{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b5a749ab",
   "metadata": {},
   "source": [
    "# 数据导入"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4883c85c",
   "metadata": {},
   "outputs": [],
   "source": [
    "#数据导入\n",
    "import pandas as pd\n",
    "user1 = pd.read_csv(r\"C:\\Users\\阿璃\\Desktop\\数据分析大作业项目题目（三选一）\\B题\\数据\\login_day.csv\")\n",
    "user2 = pd.read_csv(r\"C:\\Users\\阿璃\\Desktop\\数据分析大作业项目题目（三选一）\\B题\\数据\\result.csv\")\n",
    "user3 = pd.read_csv(r\"C:\\Users\\阿璃\\Desktop\\数据分析大作业项目题目（三选一）\\B题\\数据\\user_info.csv\")\n",
    "user4 = pd.read_csv(r\"C:\\Users\\阿璃\\Desktop\\数据分析大作业项目题目（三选一）\\B题\\数据\\visit_info.csv\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dc35f112",
   "metadata": {},
   "source": [
    "# 数据清洗"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "71cb48d2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "False    135617\n",
      "dtype: int64\n",
      "False    4639\n",
      "dtype: int64\n",
      "False    135968\n",
      "dtype: int64\n",
      "False    135617\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "#查看是否有重复值\n",
    "print(user1.duplicated().value_counts())\n",
    "print(user2.duplicated().value_counts())\n",
    "print(user3.duplicated().value_counts())\n",
    "print(user4.duplicated().value_counts())\n",
    "#结果没有"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "id": "d2a45622",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<bound method NDFrame._add_numeric_operations.<locals>.any of         user_id  login_day  login_diff_time  distance_day  login_time  \\\n",
       "0         False      False            False         False       False   \n",
       "1         False      False            False         False       False   \n",
       "2         False      False            False         False       False   \n",
       "3         False      False            False         False       False   \n",
       "4         False      False            False         False       False   \n",
       "...         ...        ...              ...           ...         ...   \n",
       "135612    False      False            False         False       False   \n",
       "135613    False      False            False         False       False   \n",
       "135614    False      False            False         False       False   \n",
       "135615    False      False            False         False       False   \n",
       "135616    False      False            False         False       False   \n",
       "\n",
       "        launch_time  chinese_subscribe_num  math_subscribe_num  add_friend  \\\n",
       "0             False                  False               False       False   \n",
       "1             False                  False               False       False   \n",
       "2             False                  False               False       False   \n",
       "3             False                  False               False       False   \n",
       "4             False                  False               False       False   \n",
       "...             ...                    ...                 ...         ...   \n",
       "135612        False                  False               False       False   \n",
       "135613        False                  False               False       False   \n",
       "135614        False                  False               False       False   \n",
       "135615        False                  False               False       False   \n",
       "135616        False                  False               False       False   \n",
       "\n",
       "        add_group  camp_num  learn_num  finish_num  study_num  coupon  \\\n",
       "0           False     False      False       False      False   False   \n",
       "1           False     False      False       False      False   False   \n",
       "2           False     False      False       False      False   False   \n",
       "3           False     False      False       False      False   False   \n",
       "4           False     False      False       False      False   False   \n",
       "...           ...       ...        ...         ...        ...     ...   \n",
       "135612      False     False      False       False      False   False   \n",
       "135613      False     False      False       False      False   False   \n",
       "135614      False     False      False       False      False   False   \n",
       "135615      False     False      False       False      False   False   \n",
       "135616      False     False      False       False      False   False   \n",
       "\n",
       "        course_order_num  \n",
       "0                  False  \n",
       "1                  False  \n",
       "2                  False  \n",
       "3                  False  \n",
       "4                  False  \n",
       "...                  ...  \n",
       "135612             False  \n",
       "135613             False  \n",
       "135614             False  \n",
       "135615             False  \n",
       "135616             False  \n",
       "\n",
       "[135617 rows x 16 columns]>"
      ]
     },
     "execution_count": 122,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#首先看四个表是否有空缺值，如果有就处理\n",
    "user1.isnull().any"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "id": "7c09ba85",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<bound method NDFrame._add_numeric_operations.<locals>.any of       user_id  result\n",
       "0       False   False\n",
       "1       False   False\n",
       "2       False   False\n",
       "3       False   False\n",
       "4       False   False\n",
       "...       ...     ...\n",
       "4634    False   False\n",
       "4635    False   False\n",
       "4636    False   False\n",
       "4637    False   False\n",
       "4638    False   False\n",
       "\n",
       "[4639 rows x 2 columns]>"
      ]
     },
     "execution_count": 123,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user2.isnull().any"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "id": "36d761aa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<bound method NDFrame._add_numeric_operations.<locals>.any of         user_id  first_order_time  first_order_price  age_month  city_num  \\\n",
       "0         False             False              False      False     False   \n",
       "1         False             False              False      False     False   \n",
       "2         False             False              False      False      True   \n",
       "3         False             False              False      False      True   \n",
       "4         False             False              False      False      True   \n",
       "...         ...               ...                ...        ...       ...   \n",
       "135963    False             False              False      False     False   \n",
       "135964    False             False              False      False     False   \n",
       "135965    False             False              False      False     False   \n",
       "135966    False             False              False      False     False   \n",
       "135967    False             False              False      False     False   \n",
       "\n",
       "        platform_num  model_num  app_num  \n",
       "0              False      False    False  \n",
       "1              False      False    False  \n",
       "2              False      False    False  \n",
       "3              False      False    False  \n",
       "4              False      False    False  \n",
       "...              ...        ...      ...  \n",
       "135963         False      False    False  \n",
       "135964         False      False    False  \n",
       "135965         False      False    False  \n",
       "135966         False      False    False  \n",
       "135967         False      False    False  \n",
       "\n",
       "[135968 rows x 8 columns]>"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user3.isnull().any"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "id": "74224189",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<bound method NDFrame._add_numeric_operations.<locals>.any of         user_id  main_home  main_home2  mainpage  schoolreportpage  main_mime  \\\n",
       "0         False      False       False     False             False      False   \n",
       "1         False      False       False     False             False      False   \n",
       "2         False      False       False     False             False      False   \n",
       "3         False      False       False     False             False      False   \n",
       "4         False      False       False     False             False      False   \n",
       "...         ...        ...         ...       ...               ...        ...   \n",
       "135612    False      False       False     False             False      False   \n",
       "135613    False      False       False     False             False      False   \n",
       "135614    False      False       False     False             False      False   \n",
       "135615    False      False       False     False             False      False   \n",
       "135616    False      False       False     False             False      False   \n",
       "\n",
       "        lightcoursetab  main_learnpark  partnergamebarrierspage  \\\n",
       "0                False           False                    False   \n",
       "1                False           False                    False   \n",
       "2                False           False                    False   \n",
       "3                False           False                    False   \n",
       "4                False           False                    False   \n",
       "...                ...             ...                      ...   \n",
       "135612           False           False                    False   \n",
       "135613           False           False                    False   \n",
       "135614           False           False                    False   \n",
       "135615           False           False                    False   \n",
       "135616           False           False                    False   \n",
       "\n",
       "        evaulationcenter  ...  video_read  next_nize  answer_task  \\\n",
       "0                  False  ...       False      False        False   \n",
       "1                  False  ...       False      False        False   \n",
       "2                  False  ...       False      False        False   \n",
       "3                  False  ...       False      False        False   \n",
       "4                  False  ...       False      False        False   \n",
       "...                  ...  ...         ...        ...          ...   \n",
       "135612             False  ...       False      False        False   \n",
       "135613             False  ...       False      False        False   \n",
       "135614             False  ...       False      False        False   \n",
       "135615             False  ...       False      False        False   \n",
       "135616             False  ...       False      False        False   \n",
       "\n",
       "        chapter_module  course_tab  slide_subscribe  baby_info  \\\n",
       "0                False       False            False      False   \n",
       "1                False       False            False      False   \n",
       "2                False       False            False      False   \n",
       "3                False       False            False      False   \n",
       "4                False       False            False      False   \n",
       "...                ...         ...              ...        ...   \n",
       "135612           False       False            False      False   \n",
       "135613           False       False            False      False   \n",
       "135614           False       False            False      False   \n",
       "135615           False       False            False      False   \n",
       "135616           False       False            False      False   \n",
       "\n",
       "        click_notunlocked  share  click_dialog  \n",
       "0                   False  False         False  \n",
       "1                   False  False         False  \n",
       "2                   False  False         False  \n",
       "3                   False  False         False  \n",
       "4                   False  False         False  \n",
       "...                   ...    ...           ...  \n",
       "135612              False  False         False  \n",
       "135613              False  False         False  \n",
       "135614              False  False         False  \n",
       "135615              False  False         False  \n",
       "135616              False  False         False  \n",
       "\n",
       "[135617 rows x 26 columns]>"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user4.isnull().any"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "id": "94ea695c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0            广州\n",
       "1            徐州\n",
       "2            重庆\n",
       "3            重庆\n",
       "4            重庆\n",
       "          ...  \n",
       "135963       徐州\n",
       "135964       保定\n",
       "135965       西安\n",
       "135966       泉州\n",
       "135967    error\n",
       "Name: city_num, Length: 135968, dtype: object"
      ]
     },
     "execution_count": 126,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#由此可见，在user_info的表中有缺失值，丢失缺失值，但是丢失数据超过10%，所以只能填充\n",
    "city_num = user3['city_num'].fillna(user3['city_num'].mode()[0]) #填充众数\n",
    "city_num"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "id": "e46fef82",
   "metadata": {},
   "outputs": [],
   "source": [
    "user3['city_num'] = city_num #将处理好的数据放回去"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "id": "c58f9936",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>first_order_time</th>\n",
       "      <th>first_order_price</th>\n",
       "      <th>age_month</th>\n",
       "      <th>city_num</th>\n",
       "      <th>platform_num</th>\n",
       "      <th>model_num</th>\n",
       "      <th>app_num</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2000001555945280</td>\n",
       "      <td>2018/12/23 11:44</td>\n",
       "      <td>0.00</td>\n",
       "      <td>32</td>\n",
       "      <td>广州</td>\n",
       "      <td>9.2969</td>\n",
       "      <td>11.2707</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2000001556645228</td>\n",
       "      <td>2019/1/11 9:46</td>\n",
       "      <td>0.00</td>\n",
       "      <td>127</td>\n",
       "      <td>徐州</td>\n",
       "      <td>9.2969</td>\n",
       "      <td>4.9689</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2000001558047804</td>\n",
       "      <td>2018/12/26 11:04</td>\n",
       "      <td>0.00</td>\n",
       "      <td>92</td>\n",
       "      <td>重庆</td>\n",
       "      <td>9.2969</td>\n",
       "      <td>6.6392</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2000001558146467</td>\n",
       "      <td>2018/12/31 8:47</td>\n",
       "      <td>0.00</td>\n",
       "      <td>83</td>\n",
       "      <td>重庆</td>\n",
       "      <td>9.2969</td>\n",
       "      <td>12.2222</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2000001558146878</td>\n",
       "      <td>2019/1/28 1:53</td>\n",
       "      <td>0.00</td>\n",
       "      <td>47</td>\n",
       "      <td>重庆</td>\n",
       "      <td>13.5570</td>\n",
       "      <td>10.3925</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135963</th>\n",
       "      <td>2000002945827404</td>\n",
       "      <td>2019/5/13 20:20</td>\n",
       "      <td>0.00</td>\n",
       "      <td>60</td>\n",
       "      <td>徐州</td>\n",
       "      <td>13.5570</td>\n",
       "      <td>10.8966</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135964</th>\n",
       "      <td>2000002945862051</td>\n",
       "      <td>2019/5/13 21:35</td>\n",
       "      <td>0.00</td>\n",
       "      <td>60</td>\n",
       "      <td>保定</td>\n",
       "      <td>13.5570</td>\n",
       "      <td>8.1782</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135965</th>\n",
       "      <td>2000002945866461</td>\n",
       "      <td>2019/5/13 21:46</td>\n",
       "      <td>0.00</td>\n",
       "      <td>66</td>\n",
       "      <td>西安</td>\n",
       "      <td>9.2969</td>\n",
       "      <td>6.5617</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135966</th>\n",
       "      <td>2000002945873156</td>\n",
       "      <td>2019/5/13 22:10</td>\n",
       "      <td>0.00</td>\n",
       "      <td>24</td>\n",
       "      <td>泉州</td>\n",
       "      <td>9.2969</td>\n",
       "      <td>15.3061</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135967</th>\n",
       "      <td>2000002946803184</td>\n",
       "      <td>2019/5/13 12:45</td>\n",
       "      <td>0.01</td>\n",
       "      <td>45</td>\n",
       "      <td>error</td>\n",
       "      <td>9.2969</td>\n",
       "      <td>8.8308</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>135968 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                 user_id  first_order_time  first_order_price  age_month  \\\n",
       "0       2000001555945280  2018/12/23 11:44               0.00         32   \n",
       "1       2000001556645228    2019/1/11 9:46               0.00        127   \n",
       "2       2000001558047804  2018/12/26 11:04               0.00         92   \n",
       "3       2000001558146467   2018/12/31 8:47               0.00         83   \n",
       "4       2000001558146878    2019/1/28 1:53               0.00         47   \n",
       "...                  ...               ...                ...        ...   \n",
       "135963  2000002945827404   2019/5/13 20:20               0.00         60   \n",
       "135964  2000002945862051   2019/5/13 21:35               0.00         60   \n",
       "135965  2000002945866461   2019/5/13 21:46               0.00         66   \n",
       "135966  2000002945873156   2019/5/13 22:10               0.00         24   \n",
       "135967  2000002946803184   2019/5/13 12:45               0.01         45   \n",
       "\n",
       "       city_num  platform_num  model_num  app_num  \n",
       "0            广州        9.2969    11.2707        1  \n",
       "1            徐州        9.2969     4.9689        1  \n",
       "2            重庆        9.2969     6.6392        1  \n",
       "3            重庆        9.2969    12.2222        1  \n",
       "4            重庆       13.5570    10.3925        1  \n",
       "...         ...           ...        ...      ...  \n",
       "135963       徐州       13.5570    10.8966        1  \n",
       "135964       保定       13.5570     8.1782        1  \n",
       "135965       西安        9.2969     6.5617        1  \n",
       "135966       泉州        9.2969    15.3061        1  \n",
       "135967    error        9.2969     8.8308        1  \n",
       "\n",
       "[135968 rows x 8 columns]"
      ]
     },
     "execution_count": 128,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "id": "8a82de9c",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "            user_id      login_day  login_diff_time   distance_day  \\\n",
      "count  1.356170e+05  135617.000000    135617.000000  135617.000000   \n",
      "mean   2.000003e+15       4.183259         1.086262     136.364519   \n",
      "std    2.499964e+08       2.363428         1.933018     135.588232   \n",
      "min    2.000002e+15      -1.000000        -1.000000   -1275.000000   \n",
      "25%    2.000002e+15       3.000000         0.750000      38.000000   \n",
      "50%    2.000002e+15       4.000000         1.000000      84.000000   \n",
      "75%    2.000003e+15       6.000000         1.200000     180.000000   \n",
      "max    2.000003e+15     108.000000       135.000000    6588.000000   \n",
      "\n",
      "          login_time    launch_time  chinese_subscribe_num  \\\n",
      "count  135617.000000  135617.000000          135617.000000   \n",
      "mean       38.096684       0.511116               0.306540   \n",
      "std        57.639389       0.890522               0.461058   \n",
      "min         0.000000       0.000000               0.000000   \n",
      "25%         7.000000       0.000000               0.000000   \n",
      "50%        21.000000       0.000000               0.000000   \n",
      "75%        44.000000       1.000000               1.000000   \n",
      "max      1480.000000      76.000000               1.000000   \n",
      "\n",
      "       math_subscribe_num     add_friend      add_group       camp_num  \\\n",
      "count       135617.000000  135617.000000  135617.000000  135617.000000   \n",
      "mean             0.073671       0.996180       0.996180       1.607601   \n",
      "std              0.261235       0.061685       0.061685       0.960247   \n",
      "min              0.000000       0.000000       0.000000       0.000000   \n",
      "25%              0.000000       1.000000       1.000000       1.000000   \n",
      "50%              0.000000       1.000000       1.000000       1.000000   \n",
      "75%              0.000000       1.000000       1.000000       2.000000   \n",
      "max              1.000000       1.000000       1.000000       6.000000   \n",
      "\n",
      "           learn_num     finish_num      study_num         coupon  \\\n",
      "count  135617.000000  135617.000000  135617.000000  135617.000000   \n",
      "mean        3.312955       2.692140       0.169647       0.158970   \n",
      "std         2.966821       2.886859       0.375324       0.914696   \n",
      "min         0.000000       0.000000       0.000000       0.000000   \n",
      "25%         1.000000       0.000000       0.000000       0.000000   \n",
      "50%         3.000000       2.000000       0.000000       0.000000   \n",
      "75%         5.000000       5.000000       0.000000       0.000000   \n",
      "max        25.000000      25.000000       1.000000     112.000000   \n",
      "\n",
      "       course_order_num  \n",
      "count     135617.000000  \n",
      "mean           0.105621  \n",
      "std            0.552803  \n",
      "min            0.000000  \n",
      "25%            0.000000  \n",
      "50%            0.000000  \n",
      "75%            0.000000  \n",
      "max           24.000000  \n",
      "            user_id  result\n",
      "count  4.639000e+03  4639.0\n",
      "mean   2.000002e+15     1.0\n",
      "std    2.371429e+08     0.0\n",
      "min    2.000002e+15     1.0\n",
      "25%    2.000002e+15     1.0\n",
      "50%    2.000002e+15     1.0\n",
      "75%    2.000003e+15     1.0\n",
      "max    2.000003e+15     1.0\n",
      "            user_id  first_order_price      age_month   platform_num  \\\n",
      "count  1.359680e+05      135968.000000  135968.000000  135968.000000   \n",
      "mean   2.000003e+15           1.317308      66.456519      10.250447   \n",
      "std    2.492006e+08           9.961491     209.718223       1.775662   \n",
      "min    2.000002e+15           0.000000       0.000000       9.296900   \n",
      "25%    2.000002e+15           0.000000      51.000000       9.296900   \n",
      "50%    2.000002e+15           0.000000      63.000000       9.296900   \n",
      "75%    2.000003e+15           0.000000      78.000000       9.296900   \n",
      "max    2.000003e+15        2099.000000   24245.000000      13.557000   \n",
      "\n",
      "           model_num   app_num  \n",
      "count  135968.000000  135968.0  \n",
      "mean        9.417481       1.0  \n",
      "std         4.446427       0.0  \n",
      "min         0.000000       1.0  \n",
      "25%         6.346200       1.0  \n",
      "50%         8.920000       1.0  \n",
      "75%        11.440700       1.0  \n",
      "max       100.000000       1.0  \n",
      "            user_id      main_home     main_home2       mainpage  \\\n",
      "count  1.356170e+05  135617.000000  135617.000000  135617.000000   \n",
      "mean   2.000003e+15      56.539593      47.509796       5.243915   \n",
      "std    2.499964e+08      51.765433      48.725916      11.059816   \n",
      "min    2.000002e+15       0.000000       0.000000       0.000000   \n",
      "25%    2.000002e+15      12.000000      14.000000       1.000000   \n",
      "50%    2.000002e+15      48.000000      35.000000       3.000000   \n",
      "75%    2.000003e+15      87.000000      66.000000       7.000000   \n",
      "max    2.000003e+15    2172.000000    2685.000000    2847.000000   \n",
      "\n",
      "       schoolreportpage      main_mime  lightcoursetab  main_learnpark  \\\n",
      "count     135617.000000  135617.000000   135617.000000   135617.000000   \n",
      "mean           3.787342       0.773502        1.127794        1.056534   \n",
      "std            6.174570       1.977206        2.556154        3.252270   \n",
      "min            0.000000       0.000000        0.000000        0.000000   \n",
      "25%            0.000000       0.000000        0.000000        0.000000   \n",
      "50%            1.000000       0.000000        0.000000        0.000000   \n",
      "75%            6.000000       1.000000        1.000000        1.000000   \n",
      "max          482.000000     117.000000      271.000000      618.000000   \n",
      "\n",
      "       partnergamebarrierspage  evaulationcenter  ...     video_read  \\\n",
      "count            135617.000000      135617.00000  ...  135617.000000   \n",
      "mean                  0.819897           2.11879  ...      23.695694   \n",
      "std                   5.941076           6.24954  ...      27.090218   \n",
      "min                   0.000000           0.00000  ...       0.000000   \n",
      "25%                   0.000000           0.00000  ...       0.000000   \n",
      "50%                   0.000000           0.00000  ...      17.000000   \n",
      "75%                   0.000000           2.00000  ...      40.000000   \n",
      "max                 323.000000        1153.00000  ...    2269.000000   \n",
      "\n",
      "           next_nize    answer_task  chapter_module     course_tab  \\\n",
      "count  135617.000000  135617.000000   135617.000000  135617.000000   \n",
      "mean       23.037901      12.389693       10.213543      14.259053   \n",
      "std        30.152456      16.126531       16.058347      27.228271   \n",
      "min         0.000000       0.000000        0.000000       0.000000   \n",
      "25%         0.000000       0.000000        1.000000       0.000000   \n",
      "50%        17.000000       7.000000        5.000000       4.000000   \n",
      "75%        35.000000      19.000000       13.000000      16.000000   \n",
      "max      1271.000000     760.000000     1157.000000    1743.000000   \n",
      "\n",
      "       slide_subscribe      baby_info  click_notunlocked          share  \\\n",
      "count    135617.000000  135617.000000      135617.000000  135617.000000   \n",
      "mean          3.880369       4.240221           1.818511       4.884380   \n",
      "std           4.780812       3.237976           3.271041       6.886525   \n",
      "min           0.000000       0.000000           0.000000       0.000000   \n",
      "25%           1.000000       3.000000           0.000000       0.000000   \n",
      "50%           3.000000       5.000000           1.000000       2.000000   \n",
      "75%           5.000000       5.000000           2.000000       8.000000   \n",
      "max         261.000000     229.000000          83.000000     249.000000   \n",
      "\n",
      "        click_dialog  \n",
      "count  135617.000000  \n",
      "mean        0.976375  \n",
      "std         0.851516  \n",
      "min         0.000000  \n",
      "25%         1.000000  \n",
      "50%         1.000000  \n",
      "75%         1.000000  \n",
      "max       119.000000  \n",
      "\n",
      "[8 rows x 26 columns]\n"
     ]
    }
   ],
   "source": [
    "#异常值查找：\n",
    "#1.describe()对统计字段进行描述性分析\n",
    "#2.画箱线图\n",
    "print(user1.describe()) #describe()对统计字段进行描述性分析\n",
    "print(user2.describe())\n",
    "print(user3.describe())\n",
    "print(user4.describe())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "id": "364c32de",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "一共有8517个异常数据\n",
      "异常值的最大值为24245\n",
      "异常值的最小值为0\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAc0AAAGsCAYAAAC/7fziAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAjsUlEQVR4nO3df3BU9b3/8dcuZLchDedmjclmy4+bUaT2Jpd+jRqg9Wc1whAg1U5twR2c8eK3tYFhCHPn4j/SToc4Wu10ylgp9zv13l4uuTMVbK8yKekoSEqATLipRIoX7kUDcUMwbnZJTHZD8vn+wXBkSdBPILDgPh8zO5M95727n+OMPj3702OMMQIAAJ/Lm+4FAABwvSCaAABYIpoAAFgimgAAWCKaAABYIpoAAFgimgAAWJqY7gWk0/DwsD788EPl5ubK4/GkezkAgDQxxuj06dMKhULyei9+PpnR0fzwww81derUdC8DAHCNOH78uKZMmXLR/RkdzdzcXEln/yFNnjw5zasBAKRLPB7X1KlT3S5cTEZH89xTspMnTyaaAIDPfaluTG8Eqq2t1R133KHc3FwVFBSoqqpK7733XsrM448/Lo/Hk3KZPXt2ykwikdCKFSuUn5+vnJwcLVq0SCdOnEiZiUajCofDchxHjuMoHA6rp6cnZaa9vV0LFy5UTk6O8vPztXLlSiWTybEcEgAA1sYUzV27dulHP/qR9u7dq4aGBp05c0YVFRXq6+tLmZs3b54ikYh72b59e8r+VatWadu2baqrq1NjY6N6e3tVWVmpoaEhd2bJkiVqbW1VfX296uvr1draqnA47O4fGhrSggUL1NfXp8bGRtXV1enVV19VTU3NpfxzAADg85nL0NXVZSSZXbt2uduWLVtmFi9efNHb9PT0mKysLFNXV+du6+joMF6v19TX1xtjjDl06JCRZPbu3evONDU1GUnm8OHDxhhjtm/fbrxer+no6HBntmzZYvx+v4nFYlbrj8ViRpL1PADgi8m2B5f1Oc1YLCZJCgQCKdt37typgoIC3XLLLVq+fLm6urrcfS0tLRocHFRFRYW7LRQKqaSkRHv27JEkNTU1yXEclZeXuzOzZ8+W4zgpMyUlJQqFQu7MQw89pEQioZaWllHXm0gkFI/HUy4AANi65GgaY7R69Wp985vfVElJibt9/vz52rx5s95880298MILam5u1v33369EIiFJ6uzslM/nU15eXsr9FRYWqrOz050pKCgY8ZgFBQUpM4WFhSn78/Ly5PP53JkL1dbWuq+ROo7Dx00AAGNyye+era6u1jvvvKPGxsaU7Y8++qj7d0lJiW6//XZNnz5db7zxhh5++OGL3p8xJuVdS6O9g+lSZs63du1arV692r1+7i3GAADYuKQzzRUrVugPf/iD3nrrrc/8EKgkFRUVafr06Tpy5IgkKRgMKplMKhqNpsx1dXW5Z47BYFAnT54ccV+nTp1KmbnwjDIajWpwcHDEGeg5fr/f/XgJHzMBAIzVmKJpjFF1dbW2bt2qN998U8XFxZ97m+7ubh0/flxFRUWSpLKyMmVlZamhocGdiUQiamtr09y5cyVJc+bMUSwW0/79+92Zffv2KRaLpcy0tbUpEom4Mzt27JDf71dZWdlYDgsAACseY4yxHX7qqaf07//+7/r973+vmTNnutsdx1F2drZ6e3u1bt06PfLIIyoqKtL777+vp59+Wu3t7frrX//qftPCD3/4Q73++ut65ZVXFAgEtGbNGnV3d6ulpUUTJkyQdPa10Q8//FAbN26UJD355JOaPn26/vM//1PS2Y+cfP3rX1dhYaGef/55ffzxx3r88cdVVVWlX/7yl1bHE4/H5TiOYrEYZ50AkMGsezCWt+RKGvXym9/8xhhjzCeffGIqKirMjTfeaLKyssy0adPMsmXLTHt7e8r99Pf3m+rqahMIBEx2draprKwcMdPd3W2WLl1qcnNzTW5urlm6dKmJRqMpMx988IFZsGCByc7ONoFAwFRXV5uBgQHr4+EjJwAAY+x7MKYzzS8azjSR6Xp7exUOh/U///M/uummm/Tb3/5WX/7yl9O9LOCqs+1BRn/3LJDJ7rzzTjU3N7vXDx48qNzcXN1xxx0p7ycA8Cl+hBrIQBcG83zNzc268847r/KKgOsD0QQyTG9v70WDeU5zc7N6e3uv0oqA6wfRBDLMY489Nq5zQCYhmkCG+ctf/uL+feG3Z51//fw5AGcRTSDDnDlzxv27oKBAmzZtUiQS0aZNm1K+8/n8OQBn8e5ZIMN85StfcX/0/ejRo+5HTP7hH/5B3/ve99wvIfnKV76StjUC1yrONIEMMzg46P6dm5urxx57TAcOHNBjjz3mBvPCOQBncaYJZJgpU6bowIED7vXNmzdr8+bNo84BSMWZJpBh7r77bvdvrzf1PwHnXz9/DsBZfI0eX6OHDJNMJpWdna3h4eGLzni9XvX398vn813FlQHpY9sDzjSBDOPz+VRTUyPp4meaNTU1BBMYBa9pAhnoueeekyS9+OKLKdu9Xq9qamrc/QBS8fQsT88igyWTSb300kvur5w89dRTnGEiI9n2gGgSTQDIeLymCQDAOCOaAABYIpoAAFgimgAAWCKaAABYIpoAAFgimgAAWCKaAABYIpoAAFgimgAAWCKaAABYIpoAAFgimgAAWCKaAABYIpoAAFgimgAAWCKaAABYIpoAAFgimgAAWCKaAABYIpoAAFgimgAAWCKaAABYIpoAAFgimgAAWCKaAABYIpoAAFgimgAAWCKaAABYIpoAAFgimgAAWCKaAABYIpoAAFgimgAAWCKaAABYIpoAAFgimgAAWCKaAABYIpoAAFgimgAAWCKaAABYIpoAAFgimgAAWCKaAABYIpoAAFgimgAAWBpTNGtra3XHHXcoNzdXBQUFqqqq0nvvvZcyY4zRunXrFAqFlJ2drXvvvVfvvvtuykwikdCKFSuUn5+vnJwcLVq0SCdOnEiZiUajCofDchxHjuMoHA6rp6cnZaa9vV0LFy5UTk6O8vPztXLlSiWTybEcEgAA1sYUzV27dulHP/qR9u7dq4aGBp05c0YVFRXq6+tzZ5577jm9+OKL2rBhg5qbmxUMBvXggw/q9OnT7syqVau0bds21dXVqbGxUb29vaqsrNTQ0JA7s2TJErW2tqq+vl719fVqbW1VOBx29w8NDWnBggXq6+tTY2Oj6urq9Oqrr6qmpuZy/nkAAHBx5jJ0dXUZSWbXrl3GGGOGh4dNMBg0zz77rDszMDBgHMcxL7/8sjHGmJ6eHpOVlWXq6urcmY6ODuP1ek19fb0xxphDhw4ZSWbv3r3uTFNTk5FkDh8+bIwxZvv27cbr9ZqOjg53ZsuWLcbv95tYLGa1/lgsZiRZzwMAvphse3BZr2nGYjFJUiAQkCQdO3ZMnZ2dqqiocGf8fr/uuece7dmzR5LU0tKiwcHBlJlQKKSSkhJ3pqmpSY7jqLy83J2ZPXu2HMdJmSkpKVEoFHJnHnroISUSCbW0tIy63kQioXg8nnIBAMDWJUfTGKPVq1frm9/8pkpKSiRJnZ2dkqTCwsKU2cLCQndfZ2enfD6f8vLyPnOmoKBgxGMWFBSkzFz4OHl5efL5fO7MhWpra93XSB3H0dSpU8d62ACADHbJ0ayurtY777yjLVu2jNjn8XhSrhtjRmy70IUzo81fysz51q5dq1gs5l6OHz/+mWsCAOB8lxTNFStW6A9/+IPeeustTZkyxd0eDAYlacSZXldXl3tWGAwGlUwmFY1GP3Pm5MmTIx731KlTKTMXPk40GtXg4OCIM9Bz/H6/Jk+enHIBAMDWmKJpjFF1dbW2bt2qN998U8XFxSn7i4uLFQwG1dDQ4G5LJpPatWuX5s6dK0kqKytTVlZWykwkElFbW5s7M2fOHMViMe3fv9+d2bdvn2KxWMpMW1ubIpGIO7Njxw75/X6VlZWN5bAAALAzlncX/fCHPzSO45idO3eaSCTiXj755BN35tlnnzWO45itW7eagwcPmu9///umqKjIxONxd+YHP/iBmTJlivnTn/5kDhw4YO6//34za9Ysc+bMGXdm3rx55u///u9NU1OTaWpqMqWlpaaystLdf+bMGVNSUmK+9a1vmQMHDpg//elPZsqUKaa6utr6eHj3LADAGPsejCmakka9/OY3v3FnhoeHzTPPPGOCwaDx+/3m7rvvNgcPHky5n/7+flNdXW0CgYDJzs42lZWVpr29PWWmu7vbLF261OTm5prc3FyzdOlSE41GU2Y++OADs2DBApOdnW0CgYCprq42AwMD1sdDNAEAxtj3wGOMMek6y023eDwux3EUi8V4fRMAMphtD/juWQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALBFNAAAsEU0AACwRTQAALI05mm+//bYWLlyoUCgkj8ej1157LWX/448/Lo/Hk3KZPXt2ykwikdCKFSuUn5+vnJwcLVq0SCdOnEiZiUajCofDchxHjuMoHA6rp6cnZaa9vV0LFy5UTk6O8vPztXLlSiWTybEeEgAAVsYczb6+Ps2aNUsbNmy46My8efMUiUTcy/bt21P2r1q1Stu2bVNdXZ0aGxvV29uryspKDQ0NuTNLlixRa2ur6uvrVV9fr9bWVoXDYXf/0NCQFixYoL6+PjU2Nqqurk6vvvqqampqxnpIAADYMZdBktm2bVvKtmXLlpnFixdf9DY9PT0mKyvL1NXVuds6OjqM1+s19fX1xhhjDh06ZCSZvXv3ujNNTU1Gkjl8+LAxxpjt27cbr9drOjo63JktW7YYv99vYrGY1fpjsZiRZD0PAPhisu3BFXlNc+fOnSooKNAtt9yi5cuXq6ury93X0tKiwcFBVVRUuNtCoZBKSkq0Z88eSVJTU5Mcx1F5ebk7M3v2bDmOkzJTUlKiUCjkzjz00ENKJBJqaWkZdV2JRELxeDzlAgCArXGP5vz587V582a9+eabeuGFF9Tc3Kz7779fiURCktTZ2Smfz6e8vLyU2xUWFqqzs9OdKSgoGHHfBQUFKTOFhYUp+/Py8uTz+dyZC9XW1rqvkTqOo6lTp1728QIAMsfE8b7DRx991P27pKREt99+u6ZPn6433nhDDz/88EVvZ4yRx+Nxr5//9+XMnG/t2rVavXq1ez0ejxNOAIC1K/6Rk6KiIk2fPl1HjhyRJAWDQSWTSUWj0ZS5rq4u98wxGAzq5MmTI+7r1KlTKTMXnlFGo1ENDg6OOAM9x+/3a/LkySkXAABsXfFodnd36/jx4yoqKpIklZWVKSsrSw0NDe5MJBJRW1ub5s6dK0maM2eOYrGY9u/f787s27dPsVgsZaatrU2RSMSd2bFjh/x+v8rKyq70YQEAMtCYn57t7e3V0aNH3evHjh1Ta2urAoGAAoGA1q1bp0ceeURFRUV6//339fTTTys/P1/f/va3JUmO4+iJJ55QTU2NbrjhBgUCAa1Zs0alpaV64IEHJEm33nqr5s2bp+XLl2vjxo2SpCeffFKVlZWaOXOmJKmiokJf+9rXFA6H9fzzz+vjjz/WmjVrtHz5cs4gAQBXxljflvvWW28ZSSMuy5YtM5988ompqKgwN954o8nKyjLTpk0zy5YtM+3t7Sn30d/fb6qrq00gEDDZ2dmmsrJyxEx3d7dZunSpyc3NNbm5uWbp0qUmGo2mzHzwwQdmwYIFJjs72wQCAVNdXW0GBgasj4WPnAAAjLHvgccYY9LY7LSKx+NyHEexWIyzUwDIYLY94LtnAQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALA05mi+/fbbWrhwoUKhkDwej1577bWU/cYYrVu3TqFQSNnZ2br33nv17rvvpswkEgmtWLFC+fn5ysnJ0aJFi3TixImUmWg0qnA4LMdx5DiOwuGwenp6Umba29u1cOFC5eTkKD8/XytXrlQymRzrIQEAYGXM0ezr69OsWbO0YcOGUfc/99xzevHFF7VhwwY1NzcrGAzqwQcf1OnTp92ZVatWadu2baqrq1NjY6N6e3tVWVmpoaEhd2bJkiVqbW1VfX296uvr1draqnA47O4fGhrSggUL1NfXp8bGRtXV1enVV19VTU3NWA8JAAA75jJIMtu2bXOvDw8Pm2AwaJ599ll328DAgHEcx7z88svGGGN6enpMVlaWqaurc2c6OjqM1+s19fX1xhhjDh06ZCSZvXv3ujNNTU1Gkjl8+LAxxpjt27cbr9drOjo63JktW7YYv99vYrHYqOsdGBgwsVjMvRw/ftxIuug8ACAzxGIxqx6M62uax44dU2dnpyoqKtxtfr9f99xzj/bs2SNJamlp0eDgYMpMKBRSSUmJO9PU1CTHcVReXu7OzJ49W47jpMyUlJQoFAq5Mw899JASiYRaWlpGXV9tba37dK/jOJo6der4HTwA4AtvXKPZ2dkpSSosLEzZXlhY6O7r7OyUz+dTXl7eZ84UFBSMuP+CgoKUmQsfJy8vTz6fz5250Nq1axWLxdzL8ePHL+EoAQCZauKVuFOPx5Ny3RgzYtuFLpwZbf5SZs7n9/vl9/s/cx0AAFzMuJ5pBoNBSRpxptfV1eWeFQaDQSWTSUWj0c+cOXny5Ij7P3XqVMrMhY8TjUY1ODg44gwUAIDxMK7RLC4uVjAYVENDg7stmUxq165dmjt3riSprKxMWVlZKTORSERtbW3uzJw5cxSLxbR//353Zt++fYrFYikzbW1tikQi7syOHTvk9/tVVlY2nocFAICkS3h6tre3V0ePHnWvHzt2TK2trQoEApo2bZpWrVql9evXa8aMGZoxY4bWr1+vSZMmacmSJZIkx3H0xBNPqKamRjfccIMCgYDWrFmj0tJSPfDAA5KkW2+9VfPmzdPy5cu1ceNGSdKTTz6pyspKzZw5U5JUUVGhr33tawqHw3r++ef18ccfa82aNVq+fLkmT5582f9gAAAYYaxvy33rrbeMpBGXZcuWGWPOfuzkmWeeMcFg0Pj9fnP33XebgwcPptxHf3+/qa6uNoFAwGRnZ5vKykrT3t6eMtPd3W2WLl1qcnNzTW5urlm6dKmJRqMpMx988IFZsGCByc7ONoFAwFRXV5uBgQHrY7F9izEA4IvNtgceY4xJY7PTKh6Py3EcxWIxzk4BIIPZ9oDvngUAwBLRBADAEtEEAMAS0QQAwBLRBADAEtEEAMAS0QQAwBLRBADAEtEEAMAS0QQAwBLRBADAEtEEAMAS0QQAwBLRBADAEtEEAMAS0QQAwNLEdC8AQPoMDQ1p9+7dikQiKioq0l133aUJEyake1nANYszTSBDbd26VTfffLPuu+8+LVmyRPfdd59uvvlmbd26Nd1LA65ZRBPIQFu3btV3vvMdlZaWqqmpSadPn1ZTU5NKS0v1ne98h3ACF+Exxph0LyJd4vG4HMdRLBbT5MmT070c4KoYGhrSzTffrNLSUr322mvyej/9f+fh4WFVVVWpra1NR44c4alaZAzbHnCmCWSY3bt36/3339fTTz+dEkxJ8nq9Wrt2rY4dO6bdu3enaYXAtYtoAhkmEolIkkpKSkbdf277uTkAnyKaQIYpKiqSJLW1tY26/9z2c3MAPkU0gQxz11136W//9m+1fv16DQ8Pp+wbHh5WbW2tiouLddddd6VphcC1i2gCGWbChAl64YUX9Prrr6uqqirl3bNVVVV6/fXX9bOf/Yw3AQGj4MsNgAz08MMP63e/+51qamo0d+5cd3txcbF+97vf6eGHH07j6oBrFx854SMnyGB8IxBwlm0PONMEMtiECRN07733pnsZwHWD1zQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwRDQBALBENAEAsEQ0AQCwNO7RXLdunTweT8olGAy6+40xWrdunUKhkLKzs3Xvvffq3XffTbmPRCKhFStWKD8/Xzk5OVq0aJFOnDiRMhONRhUOh+U4jhzHUTgcVk9Pz3gfDgAAritypvl3f/d3ikQi7uXgwYPuvueee04vvviiNmzYoObmZgWDQT344IM6ffq0O7Nq1Spt27ZNdXV1amxsVG9vryorKzU0NOTOLFmyRK2traqvr1d9fb1aW1sVDoevxOEAAHCWGWfPPPOMmTVr1qj7hoeHTTAYNM8++6y7bWBgwDiOY15++WVjjDE9PT0mKyvL1NXVuTMdHR3G6/Wa+vp6Y4wxhw4dMpLM3r173ZmmpiYjyRw+fNh6rbFYzEgysVhsLIcIAPiCse3BFTnTPHLkiEKhkIqLi/W9731P//u//ytJOnbsmDo7O1VRUeHO+v1+3XPPPdqzZ48kqaWlRYODgykzoVBIJSUl7kxTU5Mcx1F5ebk7M3v2bDmO486MJpFIKB6Pp1wAALA17tEsLy/Xv/7rv+qPf/yjNm3apM7OTs2dO1fd3d3q7OyUJBUWFqbcprCw0N3X2dkpn8+nvLy8z5wpKCgY8dgFBQXuzGhqa2vd10Adx9HUqVMv61gBAJll3KM5f/58PfLIIyotLdUDDzygN954Q5L0L//yL+6Mx+NJuY0xZsS2C104M9r8593P2rVrFYvF3Mvx48etjgkAAOkqfOQkJydHpaWlOnLkiPsu2gvPBru6utyzz2AwqGQyqWg0+pkzJ0+eHPFYp06dGnEWez6/36/JkyenXAAAsHXFo5lIJPTXv/5VRUVFKi4uVjAYVENDg7s/mUxq165dmjt3riSprKxMWVlZKTORSERtbW3uzJw5cxSLxbR//353Zt++fYrFYu4MAADjbeJ43+GaNWu0cOFCTZs2TV1dXfrpT3+qeDyuZcuWyePxaNWqVVq/fr1mzJihGTNmaP369Zo0aZKWLFkiSXIcR0888YRqamp0ww03KBAIaM2aNe7TvZJ06623at68eVq+fLk2btwoSXryySdVWVmpmTNnjvchAQAg6QpE88SJE/r+97+vjz76SDfeeKNmz56tvXv3avr06ZKkf/zHf1R/f7+eeuopRaNRlZeXa8eOHcrNzXXv4+c//7kmTpyo7373u+rv79e3vvUtvfLKK5owYYI7s3nzZq1cudJ9l+2iRYu0YcOG8T4cAABcHmOMSfci0iUej8txHMViMV7fBIAMZtsDvnsWAABLRBMAAEtEEwAAS0QTAABLRBMAAEtEEwAAS0QTAABLRBMAAEtEEwAAS0QTAABLRBMAAEtEEwAAS0QTAABLRBMAAEtEEwAAS0QTAABLRBMAAEtEEwAAS0QTAABLRBMAAEtEEwAAS0QTAABLRBMAAEtEEwAAS0QTAABLRBMAAEtEEwAAS0QTAABLRBMAAEtEEwAAS0QTAABLRBMAAEtEEwAAS0QTAABLRBMAAEtEEwAAS0QTAABLRBMAAEtEEwAAS0QTyGBHjx6Vz+eTx+ORz+fT0aNH070k4Jo2Md0LAJAeXq9Xxhj3+uDgoGbMmCGPx6Ph4eE0rgy4dnGmCWSgC4N5PmOMvF7+0wCMhn8zgAxz9OhRN5j5+fnatGmTIpGINm3apPz8fElnw8lTtcBIHnOx/93MAPF4XI7jKBaLafLkyeleDnBV+Hw+DQ4OSjr7lOzEiZ++SnPmzBllZWVJkrKyspRMJtOyRuBqs+0BZ5pAhjkXzEceeSQlmJI0ceJELVq0KGUOwKeIJpBhzr1euX379lH3NzQ0pMwB+BT/VgAZ5p/+6Z8kSf39/ero6NDOnTu1ZcsW7dy5Ux0dHerv70+ZA/ApXtPkNU1kmGQyKb/f/7lziURCPp/vKqwISD9e0wQwKp/Pp8WLF3/mzOLFiwkmMAqiCWSYoaEh/eUvf9FNN90kj8eTss/j8eimm27SO++8o6GhoTStELh2EU0gw+zevVvvv/++/u3f/k3xeFxVVVUqLS1VVVWV4vG4fvvb3+rYsWPavXt3upcKXHP4Gj0gw0QiEUlSXV2dNmzY4J5RHjx4UH/zN3+j6urqlDkAn+JME8gwRUVFkqRf/OIXI75KzxijX/ziFylzAD7FmSaQYcrLy92/H3jgAXV2dqq7u1s33HCDgsGgduzYMWIOwFlEE8gwL730kvv3uUBKUkdHh955552UuZqamqu6NuBax9OzQIZpbGwc1zkgkxBNIMPYfv6Sz2kCIxFNIMNc7DtnL3UOyCREE8gwvb294zoHZBKiCQCAJaIJAICl6z6aL730koqLi/WlL31JZWVlfPUXAOCKua4/p/kf//EfWrVqlV566SV94xvf0MaNGzV//nwdOnRI06ZNS/fygDEbGhrSwMDAmG/3ySef6L//+7/HfT1//vOfP3fmlltu0aRJk8Z835MmTRrxhfHAte66/j3N8vJy3XbbbfrVr37lbrv11ltVVVWl2traz709v6eJa82lRiR7ovTV/PQ8cXT4o2H1nxn77W655Ra99957478g4BLY9uC6PdNMJpNqaWkZ8evyFRUV2rNnz6i3SSQSSiQS7vV4PH5F1whcLV/N9+rA//1yWh77to29+q/O4THf7rvf/e4VWA1wZV230fzoo480NDSkwsLClO2FhYXq7Owc9Ta1tbX68Y9/fDWWB1ySS33i57/279Fti+8a59XY+X+/363/c+fctDw2cLVdt9E858Kns4wxF32Ka+3atVq9erV7PR6Pa+rUqVd0fcDVMLPk6/rnN5qtZsvKyqzvt6Wl5fMf+6tftb4/4Hp33UYzPz9fEyZMGHFW2dXVNeLs8xy/3y+/3381lgdcVZMmTdJtt9027vd7Je4TuJ5dtx858fl8KisrU0NDQ8r2hoYGzZ3LU0XAxdg+BXwdv0cQuGKu2zNNSVq9erXC4bBuv/12zZkzR7/+9a/V3t6uH/zgB+leGnBN+6yXMc7tBzDSdR3NRx99VN3d3frJT36iSCSikpISbd++XdOnT0/30oBr3sXCSTCBi7uuP6d5uficJgBAsu/BdfuaJgAAVxvRBADAEtEEAMAS0QQAwBLRBADAEtEEAMAS0QQAwBLRBADAEtEEAMDSdf01epfr3Jch8WPUAJDZznXg874kL6Ojefr0aUniNzUBAJLOdsFxnIvuz+jvnh0eHtaHH36o3Nzcz/zFB+CL7NyPsR8/fpzvYEbGMsbo9OnTCoVC8nov/splRkcTAD9cAIwFbwQCAMAS0QQAwBLRBDKc3+/XM888I7/fn+6lANc8XtMEAMASZ5oAAFgimgAAWCKaAABYIpoAAFgimgAAWCKaQIZ6++23tXDhQoVCIXk8Hr322mvpXhJwzSOaQIbq6+vTrFmztGHDhnQvBbhuZPSvnACZbP78+Zo/f366lwFcVzjTBADAEtEEAMAS0QQAwBLRBADAEtEEAMAS754FMlRvb6+OHj3qXj927JhaW1sVCAQ0bdq0NK4MuHbx02BAhtq5c6fuu+++EduXLVumV1555eovCLgOEE0AACzxmiYAAJaIJgAAlogmAACWiCYAAJaIJgAAlogmAACWiCYAAJaIJgAAlogmAACWiCYAAJaIJgAAlv4/aqxirAW0DI8AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 500x500 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#由此可看出来user_info表中的age_month字段有异常值，login_day表中的login_day和login_diff_time有异常值\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "\n",
    "\n",
    "plt.figure(figsize=(5,5),dpi=100)\n",
    "p = plt.boxplot(user3['age_month'].values, notch=True)\n",
    "\n",
    "#获取异常值的关键语句\n",
    "outlier = p[\"fliers\"][0].get_ydata()\n",
    "\n",
    "print(f\"一共有{len(outlier)}个异常数据\")\n",
    "print(f\"异常值的最大值为{outlier.max()}\")\n",
    "print(f\"异常值的最小值为{outlier.min()}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "id": "b2c69cb1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>first_order_time</th>\n",
       "      <th>first_order_price</th>\n",
       "      <th>age_month</th>\n",
       "      <th>city_num</th>\n",
       "      <th>platform_num</th>\n",
       "      <th>model_num</th>\n",
       "      <th>app_num</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2000001555945280</td>\n",
       "      <td>2018/12/23 11:44</td>\n",
       "      <td>0.00</td>\n",
       "      <td>32</td>\n",
       "      <td>广州</td>\n",
       "      <td>9.2969</td>\n",
       "      <td>11.2707</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2000001556645228</td>\n",
       "      <td>2019/1/11 9:46</td>\n",
       "      <td>0.00</td>\n",
       "      <td>63</td>\n",
       "      <td>徐州</td>\n",
       "      <td>9.2969</td>\n",
       "      <td>4.9689</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2000001558047804</td>\n",
       "      <td>2018/12/26 11:04</td>\n",
       "      <td>0.00</td>\n",
       "      <td>63</td>\n",
       "      <td>重庆</td>\n",
       "      <td>9.2969</td>\n",
       "      <td>6.6392</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2000001558146467</td>\n",
       "      <td>2018/12/31 8:47</td>\n",
       "      <td>0.00</td>\n",
       "      <td>63</td>\n",
       "      <td>重庆</td>\n",
       "      <td>9.2969</td>\n",
       "      <td>12.2222</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2000001558146878</td>\n",
       "      <td>2019/1/28 1:53</td>\n",
       "      <td>0.00</td>\n",
       "      <td>47</td>\n",
       "      <td>重庆</td>\n",
       "      <td>13.5570</td>\n",
       "      <td>10.3925</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135963</th>\n",
       "      <td>2000002945827404</td>\n",
       "      <td>2019/5/13 20:20</td>\n",
       "      <td>0.00</td>\n",
       "      <td>63</td>\n",
       "      <td>徐州</td>\n",
       "      <td>13.5570</td>\n",
       "      <td>10.8966</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135964</th>\n",
       "      <td>2000002945862051</td>\n",
       "      <td>2019/5/13 21:35</td>\n",
       "      <td>0.00</td>\n",
       "      <td>63</td>\n",
       "      <td>保定</td>\n",
       "      <td>13.5570</td>\n",
       "      <td>8.1782</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135965</th>\n",
       "      <td>2000002945866461</td>\n",
       "      <td>2019/5/13 21:46</td>\n",
       "      <td>0.00</td>\n",
       "      <td>63</td>\n",
       "      <td>西安</td>\n",
       "      <td>9.2969</td>\n",
       "      <td>6.5617</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135966</th>\n",
       "      <td>2000002945873156</td>\n",
       "      <td>2019/5/13 22:10</td>\n",
       "      <td>0.00</td>\n",
       "      <td>24</td>\n",
       "      <td>泉州</td>\n",
       "      <td>9.2969</td>\n",
       "      <td>15.3061</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135967</th>\n",
       "      <td>2000002946803184</td>\n",
       "      <td>2019/5/13 12:45</td>\n",
       "      <td>0.01</td>\n",
       "      <td>45</td>\n",
       "      <td>error</td>\n",
       "      <td>9.2969</td>\n",
       "      <td>8.8308</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>135968 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                 user_id  first_order_time  first_order_price  age_month  \\\n",
       "0       2000001555945280  2018/12/23 11:44               0.00         32   \n",
       "1       2000001556645228    2019/1/11 9:46               0.00         63   \n",
       "2       2000001558047804  2018/12/26 11:04               0.00         63   \n",
       "3       2000001558146467   2018/12/31 8:47               0.00         63   \n",
       "4       2000001558146878    2019/1/28 1:53               0.00         47   \n",
       "...                  ...               ...                ...        ...   \n",
       "135963  2000002945827404   2019/5/13 20:20               0.00         63   \n",
       "135964  2000002945862051   2019/5/13 21:35               0.00         63   \n",
       "135965  2000002945866461   2019/5/13 21:46               0.00         63   \n",
       "135966  2000002945873156   2019/5/13 22:10               0.00         24   \n",
       "135967  2000002946803184   2019/5/13 12:45               0.01         45   \n",
       "\n",
       "       city_num  platform_num  model_num  app_num  \n",
       "0            广州        9.2969    11.2707        1  \n",
       "1            徐州        9.2969     4.9689        1  \n",
       "2            重庆        9.2969     6.6392        1  \n",
       "3            重庆        9.2969    12.2222        1  \n",
       "4            重庆       13.5570    10.3925        1  \n",
       "...         ...           ...        ...      ...  \n",
       "135963       徐州       13.5570    10.8966        1  \n",
       "135964       保定       13.5570     8.1782        1  \n",
       "135965       西安        9.2969     6.5617        1  \n",
       "135966       泉州        9.2969    15.3061        1  \n",
       "135967    error        9.2969     8.8308        1  \n",
       "\n",
       "[135968 rows x 8 columns]"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "# 计算中位数\n",
    "median_age = np.median(user3['age_month'])\n",
    "\n",
    "# 将age_mothon列中小于22岁和大于55岁的年龄数据替换成中位数\n",
    "user3.loc[user3['age_month'] < 20, 'age_month'] = median_age\n",
    "user3.loc[user3['age_month'] > 55, 'age_month'] = median_age\n",
    "\n",
    "user3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "id": "40bdab08",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>login_day</th>\n",
       "      <th>login_diff_time</th>\n",
       "      <th>distance_day</th>\n",
       "      <th>login_time</th>\n",
       "      <th>launch_time</th>\n",
       "      <th>chinese_subscribe_num</th>\n",
       "      <th>math_subscribe_num</th>\n",
       "      <th>add_friend</th>\n",
       "      <th>add_group</th>\n",
       "      <th>camp_num</th>\n",
       "      <th>learn_num</th>\n",
       "      <th>finish_num</th>\n",
       "      <th>study_num</th>\n",
       "      <th>coupon</th>\n",
       "      <th>course_order_num</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2000001555945280</td>\n",
       "      <td>7</td>\n",
       "      <td>6.86</td>\n",
       "      <td>131</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2000001556645228</td>\n",
       "      <td>4</td>\n",
       "      <td>1.00</td>\n",
       "      <td>81</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2000001558047804</td>\n",
       "      <td>1</td>\n",
       "      <td>0.00</td>\n",
       "      <td>179</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2000001558146467</td>\n",
       "      <td>6</td>\n",
       "      <td>1.00</td>\n",
       "      <td>32</td>\n",
       "      <td>24</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2000001558146878</td>\n",
       "      <td>4</td>\n",
       "      <td>1.75</td>\n",
       "      <td>361</td>\n",
       "      <td>39</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135612</th>\n",
       "      <td>2000002947317726</td>\n",
       "      <td>1</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135613</th>\n",
       "      <td>2000002947317758</td>\n",
       "      <td>1</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135614</th>\n",
       "      <td>2000002947317827</td>\n",
       "      <td>4</td>\n",
       "      <td>-1.00</td>\n",
       "      <td>-1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135615</th>\n",
       "      <td>2000002947317941</td>\n",
       "      <td>1</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>393</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135616</th>\n",
       "      <td>2000002948014779</td>\n",
       "      <td>1</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>135617 rows × 16 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                 user_id  login_day  login_diff_time  distance_day  \\\n",
       "0       2000001555945280          7             6.86           131   \n",
       "1       2000001556645228          4             1.00            81   \n",
       "2       2000001558047804          1             0.00           179   \n",
       "3       2000001558146467          6             1.00            32   \n",
       "4       2000001558146878          4             1.75           361   \n",
       "...                  ...        ...              ...           ...   \n",
       "135612  2000002947317726          1             0.00             0   \n",
       "135613  2000002947317758          1             0.00             0   \n",
       "135614  2000002947317827          4            -1.00            -1   \n",
       "135615  2000002947317941          1             0.00             0   \n",
       "135616  2000002948014779          1             0.00             0   \n",
       "\n",
       "        login_time  launch_time  chinese_subscribe_num  math_subscribe_num  \\\n",
       "0                1            1                      1                   0   \n",
       "1                3            1                      1                   1   \n",
       "2                3            0                      1                   0   \n",
       "3               24            3                      0                   0   \n",
       "4               39            0                      0                   1   \n",
       "...            ...          ...                    ...                 ...   \n",
       "135612           2            0                      0                   0   \n",
       "135613           2            0                      0                   0   \n",
       "135614           0            0                      0                   0   \n",
       "135615         393            0                      0                   0   \n",
       "135616           4            0                      0                   0   \n",
       "\n",
       "        add_friend  add_group  camp_num  learn_num  finish_num  study_num  \\\n",
       "0                1          1         0          0           0          0   \n",
       "1                1          1         2          1           0          0   \n",
       "2                1          1         2          0           0          0   \n",
       "3                1          1         1          5           5          0   \n",
       "4                1          1         2          0           0          1   \n",
       "...            ...        ...       ...        ...         ...        ...   \n",
       "135612           1          1         1          0           0          0   \n",
       "135613           1          1         1          0           0          0   \n",
       "135614           1          1         1          0           0          0   \n",
       "135615           1          1         1          0           0          0   \n",
       "135616           1          1         2          0           0          0   \n",
       "\n",
       "        coupon  course_order_num  \n",
       "0            0                 4  \n",
       "1            0                 0  \n",
       "2            0                 0  \n",
       "3            0                 1  \n",
       "4            0                 0  \n",
       "...        ...               ...  \n",
       "135612       0                 0  \n",
       "135613       0                 0  \n",
       "135614       0                 0  \n",
       "135615       0                 0  \n",
       "135616       0                 0  \n",
       "\n",
       "[135617 rows x 16 columns]"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "# 计算中位数\n",
    "median_age = np.median(user1['login_day'])\n",
    "\n",
    "# 将user1['login_day']中为负数的异常值替换成中位数\n",
    "user1.loc[user1['login_day'] < 0, 'login_day'] = median_age\n",
    "\n",
    "\n",
    "user1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "id": "e85543ec",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "一共有26475个异常数据\n",
      "异常值的最大值为135.0\n",
      "异常值的最小值为-1.0\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbsAAAGtCAYAAACP5tqYAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAmgklEQVR4nO3df3RU9Z3/8dckIbMJJpGAmcmUINGk0jVRAVvc2CzJInFZQbOIVFHKtv6hB7WbEn7FbnfRc0zKDwFPs9hiexarC7THE2KXryjpLj9iw7YYYC1Y5UcjIGROqBtnAokZmLnfP7K5OCRAEidM7p3n45x7zsy975m880d48bn3fu7HYRiGIQAAbCwu2g0AADDYCDsAgO0RdgAA2yPsAAC2R9gBAGyPsAMA2B5hBwCwPcIOAGB7CdFuYCBCoZBOnz6tlJQUORyOaLcDAIgSwzDU1tYmj8ejuLjLj98sGXanT59WVlZWtNsAAAwRJ0+e1OjRoy97vN9ht3v3bq1cuVKNjY1qbm7Wli1bVFpa2mvtE088ofXr12vNmjUqKysz93d2dmrhwoXatGmTOjo6NGXKFK1bt+6KjX5RSkqKpK5fLjU1tb+/AgDAJvx+v7KyssxcuJx+h925c+d0++236zvf+Y4efPDBy9bV1tbqd7/7nTweT49jZWVl+o//+A9t3rxZI0eOVHl5uaZPn67GxkbFx8dftYfuU5epqamEHQDgqpe0+h1206ZN07Rp065Yc+rUKT399NN65513dN9994Ud8/l8+vnPf67XXntN99xzjyTp9ddfV1ZWln7zm9/o3nvv7W9LAABcUcTvxgyFQpo7d64WLVqkW2+9tcfxxsZGnT9/XiUlJeY+j8ejvLw8NTQ09PqdnZ2d8vv9YRsAAH0V8bBbvny5EhIS9L3vfa/X416vV4mJiRoxYkTYfpfLJa/X2+tnqqqqlJaWZm7cnAIA6I+Ihl1jY6Neeuklbdiwod9TAgzDuOxnKioq5PP5zO3kyZORaBcAECMiGnb19fVqaWnRmDFjlJCQoISEBB0/flzl5eUaO3asJMntdisQCKi1tTXssy0tLXK5XL1+r9PpNG9G4aYUAEB/RTTs5s6dq/fff18HDhwwN4/Ho0WLFumdd96RJE2cOFHDhg1TXV2d+bnm5mYdPHhQBQUFkWwHAABJA7gb8+zZszp69Kj5vqmpSQcOHFB6errGjBmjkSNHhtUPGzZMbrdbt9xyiyQpLS1Njz/+uMrLyzVy5Eilp6dr4cKFys/PN+/OBAAgkvoddu+9956Ki4vN9wsWLJAkzZs3Txs2bOjTd6xZs0YJCQmaPXu2Oal8w4YNfZpjBwBAfzkMwzCi3UR/+f1+paWlyefzcf0OAGJYX/OAVQ8AALZnyQdBA7EuGAyqvr5ezc3NyszMVGFhIZcBgCtgZAdYTE1NjXJyclRcXKw5c+aouLhYOTk5qqmpiXZrwJBF2AEWUlNTo1mzZik/P1979uxRW1ub9uzZo/z8fM2aNYvAAy6DG1QAiwgGg8rJyVF+fr5qa2vDFqoMhUIqLS3VwYMHdeTIEU5pImZwgwpgM/X19fr444/17LPP9liROS4uThUVFWpqalJ9fX2UOgSGLsIOsIjm5mZJUl5eXq/Hu/d31wG4iLADLCIzM1OSdPDgwV6Pd+/vrgNwEWEHWERhYaHGjh2ryspKhUKhsGOhUEhVVVXKzs5WYWFhlDoEhi7CDrCI+Ph4vfjii9q6datKS0vD7sYsLS3V1q1btWrVKm5OAXrBpHLAQmbOnKk33nhD5eXlYauEZGdn64033tDMmTOj2B0wdDH1ALAgnqACdOlrHjCyAywoPj5eRUVF0W4DsAyu2QEAbI+wAwDYHmEHALA9wg4AYHuEHQDA9gg7AIDtEXYAANsj7AAAtkfYAQBsj7ADANgeYQcAsD3CDgBge4QdAMD2CDsAgO0RdgAA2yPsAAC2R9gBAGyPsAMA2B5hBwCwPcIOAGB7hB0AwPYIOwCA7RF2AADbI+wAALZH2AEAbI+wAwDYHmEHALC9fofd7t27NWPGDHk8HjkcDtXW1prHzp8/ryVLlig/P1/Dhw+Xx+PRt7/9bZ0+fTrsOzo7O/XMM89o1KhRGj58uO6//3598sknX/qXAQCgN/0Ou3Pnzun2229XdXV1j2Pt7e3at2+ffvjDH2rfvn2qqanR4cOHdf/994fVlZWVacuWLdq8ebPeffddnT17VtOnT1cwGBz4bwIAwGU4DMMwBvxhh0NbtmxRaWnpZWv27t2rb3zjGzp+/LjGjBkjn8+nG264Qa+99pq+9a1vSZJOnz6trKwsvfXWW7r33nuv+nP9fr/S0tLk8/mUmpo60PYBABbX1zwY9Gt2Pp9PDodD119/vSSpsbFR58+fV0lJiVnj8XiUl5enhoaGXr+js7NTfr8/bAMAoK8GNew+//xzLV26VHPmzDET1+v1KjExUSNGjAirdblc8nq9vX5PVVWV0tLSzC0rK2sw2wYA2Myghd358+f18MMPKxQKad26dVetNwxDDoej12MVFRXy+XzmdvLkyUi3CwCwsUEJu/Pnz2v27NlqampSXV1d2HlUt9utQCCg1tbWsM+0tLTI5XL1+n1Op1OpqalhGwAAfRXxsOsOuiNHjug3v/mNRo4cGXZ84sSJGjZsmOrq6sx9zc3NOnjwoAoKCiLdDgAASujvB86ePaujR4+a75uamnTgwAGlp6fL4/Fo1qxZ2rdvn7Zu3apgMGheh0tPT1diYqLS0tL0+OOPq7y8XCNHjlR6eroWLlyo/Px83XPPPZH7zQAA+D/9nnqwc+dOFRcX99g/b948LVu2TNnZ2b1+bseOHSoqKpLUdePKokWLtHHjRnV0dGjKlClat25dn288YeoBAEDqex58qXl20ULYAQCkITTPDgCAaCPsAAC2R9gBAGyPsAMA2B5hBwCwPcIOAGB7hB0AwPYIOwCA7RF2AADbI+wAALZH2AEAbI+wAwDYHmEHALA9wg4AYHuEHQDA9gg7AIDtEXYAANsj7AAAtkfYAQBsj7ADANgeYQcAsD3CDgBge4QdAMD2CDsAgO0RdgAA2yPsAAC2R9gBAGyPsAMA2B5hBwCwPcIOAGB7hB0AwPYIOwCA7RF2AADbI+wAALZH2AEAbI+wAwDYHmEHALA9wg4AYHuEHQDA9gg7AIDtEXYAANvrd9jt3r1bM2bMkMfjkcPhUG1tbdhxwzC0bNkyeTweJSUlqaioSIcOHQqr6ezs1DPPPKNRo0Zp+PDhuv/++/XJJ598qV8EAIDL6XfYnTt3Trfffruqq6t7Pb5ixQqtXr1a1dXV2rt3r9xut6ZOnaq2tjazpqysTFu2bNHmzZv17rvv6uzZs5o+fbqCweDAfxMAAC7H+BIkGVu2bDHfh0Ihw+12Gz/60Y/MfZ9//rmRlpZm/OQnPzEMwzA+++wzY9iwYcbmzZvNmlOnThlxcXHG22+/3aef6/P5DEmGz+f7Mu0DACyur3kQ0Wt2TU1N8nq9KikpMfc5nU5NnjxZDQ0NkqTGxkadP38+rMbj8SgvL8+suVRnZ6f8fn/YBgBAX0U07LxeryTJ5XKF7Xe5XOYxr9erxMREjRgx4rI1l6qqqlJaWpq5ZWVlRbJtAIDNDcrdmA6HI+y9YRg99l3qSjUVFRXy+XzmdvLkyYj1CgCwv4iGndvtlqQeI7SWlhZztOd2uxUIBNTa2nrZmks5nU6lpqaGbQAA9FVEwy47O1tut1t1dXXmvkAgoF27dqmgoECSNHHiRA0bNiysprm5WQcPHjRrAACIpIT+fuDs2bM6evSo+b6pqUkHDhxQenq6xowZo7KyMlVWVio3N1e5ubmqrKxUcnKy5syZI0lKS0vT448/rvLyco0cOVLp6elauHCh8vPzdc8990TuNwMA4P/0O+zee+89FRcXm+8XLFggSZo3b542bNigxYsXq6OjQ/Pnz1dra6smTZqk7du3KyUlxfzMmjVrlJCQoNmzZ6ujo0NTpkzRhg0bFB8fH4FfCQCAcA7DMIxoN9Fffr9faWlp8vl8XL8DgBjW1zzg2ZgAANsj7AAAtkfYAQBsj7ADANgeYQcAsD3CDgBge4QdAMD2CDsAgO0RdgAA2yPsAAC2R9gBAGyPsAMA2B5hBwCwPcIOAGB7hB0AwPYIOwCA7RF2AADbI+wAALZH2AEAbI+wAwDYHmEHALA9wg4AYHuEHQDA9gg7AIDtEXYAANsj7AAAtpcQ7QYA9F8wGFR9fb2am5uVmZmpwsJCxcfHR7stYMhiZAdYTE1NjXJyclRcXKw5c+aouLhYOTk5qqmpiXZrwJBF2AEWUlNTo1mzZik/P1979uxRW1ub9uzZo/z8fM2aNYvAAy7DYRiGEe0m+svv9ystLU0+n0+pqanRbge4JoLBoHJycpSfn6/a2lrFxV38v2ooFFJpaakOHjyoI0eOcEoTMaOvecDIDrCI+vp6ffzxx3r22WfDgk6S4uLiVFFRoaamJtXX10epQ2DoIuwAi2hubpYk5eXl9Xq8e393HYCLCDvAIjIzMyVJBw8e7PV49/7uOgAXEXaARRQWFmrs2LGqrKxUKBQKOxYKhVRVVaXs7GwVFhZGqUNg6CLsAIuIj4/Xiy++qK1bt6q0tDTsbszS0lJt3bpVq1at4uYUoBdMKgcsZObMmXrjjTdUXl6ugoICc392drbeeOMNzZw5M4rdAUMXUw8AC+IJKkCXvuYBIzvAguLj41VUVBTtNgDL4JodAMD2CDsAgO1FPOwuXLigf/qnf1J2draSkpJ000036fnnnw+7VdowDC1btkwej0dJSUkqKirSoUOHIt0KAACSBiHsli9frp/85Ceqrq7WH//4R61YsUIrV67Uj3/8Y7NmxYoVWr16taqrq7V371653W5NnTpVbW1tkW4HAIDIh92ePXv0wAMP6L777tPYsWM1a9YslZSU6L333pPUNapbu3atfvCDH2jmzJnKy8vTq6++qvb2dm3cuDHS7QAAEPmw++Y3v6n//M//1OHDhyVJ//M//6N3331Xf/d3fydJampqktfrVUlJifkZp9OpyZMnq6Ghodfv7OzslN/vD9sAAOiriE89WLJkiXw+n8aNG6f4+HgFg0G98MILeuSRRyRJXq9XkuRyucI+53K5dPz48V6/s6qqSs8991ykWwUsi3l2QP9EfGT3y1/+Uq+//ro2btyoffv26dVXX9WqVav06quvhtU5HI6w94Zh9NjXraKiQj6fz9xOnjwZ6bYBy2ClcqD/Ih52ixYt0tKlS/Xwww8rPz9fc+fO1fe//31VVVVJktxut6SLI7xuLS0tPUZ73ZxOp1JTU8M2IBaxUjkwMBEPu/b29h4LS8bHx5tTD7Kzs+V2u1VXV2ceDwQC2rVrV9iz/gCECwaDKi8v1/Tp01VbW6u77rpL1113ne666y7V1tZq+vTpWrhwoYLBYLRbBYaciF+zmzFjhl544QWNGTNGt956q/bv36/Vq1fru9/9rqSu05dlZWWqrKxUbm6ucnNzVVlZqeTkZM2ZMyfS7QC20b1S+aZNmy67UnlBQYHq6+t5lBhwiYiH3Y9//GP98Ic/1Pz589XS0iKPx6MnnnhC//zP/2zWLF68WB0dHZo/f75aW1s1adIkbd++XSkpKZFuB7ANVioHBo5VDwCL2Llzp4qLi7Vnzx7dddddPY7v2bNHBQUF2rFjByM7xIy+5gHPxgQsgpXKgYEj7ACLYKVyYOBYzw6wEFYqBwaGkR1gQZdear/0tCaAcIQdYCHdk8pvu+22sNOYt912G5PKgSvgbkzAIoLBoHJycpSfn6/a2tqwuXahUEilpaU6ePCgjhw5wnU7xAzuxgRspntS+bPPPnvZSeVNTU2qr6+PUofA0EXYARbBpHJg4Ag7wCIyMzMlSQcPHuz1ePf+7joAFxF2gEUwqRwYOMIOsAgmlQMDx6RywEKYVA4MDCM7wIKYVA70D2EHWAiTyoGBYVI5YBFMKgd6YlI5YDNMKgcGjrADLIJJ5cDAEXaARTCpHBg4wg6wCCaVAwNH2AEWwaRyYOCYVA5YCJPKgYFh6gFgQcFgUPX19WpublZmZqYKCwsZ0SEm9TUPGNkBFhQfH6+ioqJotwFYBmEHWFAgENC6det07Ngx3XzzzZo/f74SExOj3RYwZBF2gMUsXrxYq1evVjAYNPctXLhQCxYs0IoVK6LYGTB0EXaAhSxevFgrV67ssT8YDJr7CTygJ6YeABYRCAS0atUqSVJGRoZeeeUVNTc365VXXlFGRoYkadWqVQoEAtFsExiSCDvAIl566SUZhqHU1FSdOHFCOTk52rFjh3JycnTixAmlpKTIMAy99NJL0W4VGHIIO8Ai3nzzTUnS3//932vcuHEqLi7WnDlzVFxcrHHjxqm0tDSsDsBFXLMDLOYXv/iF7rvvPi1atEhJSUnq6OjQtm3b9Prrr0e7NWDIIuwAi5gxY4Z++9vfyuFw6P3339fWrVvNY2PGjJHD4ZBhGJoxY0YUuwSGJk5jAhYxYcIESV0PffZ6vVqyZIkOHz6sJUuWyOv1mg+H7q4DcBEjO8AiWlpazNeBQEDLly/X8uXLr1gHoAsjO8Aizpw5I0m69957e12pfOrUqWF1AC4i7ACLuOGGGyR1jdxGjx4ddmz06NH685//HFYH4CJOYwIW8ZWvfEWStH//frlcLpWXl+umm27Sn/70J73++uvav39/WB2Ai1jiB7CIQCCg4cOHKzExUZ2dnWHPxoyPj5fT6VQgENC5c+d4KDRiBkv8ADbT0NCgCxcu6MKFC8rIyNDcuXPNkd1rr71m3pjS0NDA8j/AJQg7wCJOnTolSRo/frxaW1v14osvmseys7M1fvx47d+/36wDcBE3qAAW0X2X5fz583X06FHt2LFDGzdu1I4dO3TkyBE9+eSTYXUALiLsAIvovsuypqbGnEDeLRQKqba2NqwOwEWDEnanTp3SY489ppEjRyo5OVl33HGHGhsbzeOGYWjZsmXyeDxKSkpSUVGRDh06NBitALbRfZfltm3blJKSEvYg6JSUFG3bti2sDsBFEQ+71tZW3X333Ro2bJi2bdumDz74QC+++KKuv/56s2bFihVavXq1qqurtXfvXrndbk2dOlVtbW2RbgewjcLCQnPU1tnZGXas+31GRoYKCwuveW/AUBfxqQdLly7Vb3/7W9XX1/d63DAMeTwelZWVacmSJZK6/lBdLpeWL1+uJ5544qo/g6kHiEXBYFDJyclXXJw1MTFR7e3tio+Pv4adAdHT1zyI+Mju17/+te6880499NBDysjI0Pjx4/XKK6+Yx5uamuT1elVSUmLuczqdmjx5shoaGnr9zs7OTvn9/rANiDXbt2+/6irkgUBA27dvv0YdAdYR8bD705/+pJdfflm5ubl655139OSTT+p73/uefvGLX0iSvF6vJMnlcoV9zuVymccuVVVVpbS0NHPLysqKdNvAkLd69Wrz9bRp0/Tggw/qb/7mb/Tggw9q2rRpvdYB6BLxeXahUEh33nmnKisrJXXNCTp06JBefvllffvb3zbrHA5H2OcMw+ixr1tFRYUWLFhgvvf7/QQeYs6JEyckSSNGjND27dt7PEFlxIgRam1tNesAXBTxkV1mZqb+8i//Mmzf1772NfMP0O12S1KPUVxLS0uP0V43p9Op1NTUsA2INdddd52krpvAept60NraGlYH4KKIh93dd9+tjz76KGzf4cOHdeONN0rqetKD2+1WXV2deTwQCGjXrl0qKCiIdDuAbRQXF5uv09PTVV5ern/9139VeXm50tPTe60D0CXipzG///3vq6CgQJWVlZo9e7Z+//vfa/369Vq/fr2krtOXZWVlqqysVG5urnJzc1VZWank5GTNmTMn0u0AtnH27Fnz9aeffhr2uLDL1QHoEvGw+/rXv64tW7aooqJCzz//vLKzs7V27Vo9+uijZs3ixYvV0dGh+fPnq7W1VZMmTdL27duVkpIS6XYA27jcDVwDrQNiyaA8CHr69OmaPn36ZY87HA4tW7ZMy5YtG4wfD9jS8OHDI1oHxBKejQlYxB133GG+vvTO5bi4uF7rAHRhiR/AIjIyMszXo0aN6rGeXfdqB1+sA9CFsAMsYu/evebrM2fOXHby+N69ezVv3rxr1RZgCZzGBCyi+zG2N954Y9hpS6lrUnn39J4IP+4WsAVGdoBF5ObmSpKOHz+ujIwMFRUVKTk5We3t7dq5c6eOHz8eVgfgooivenAtsOoBYlFHR4eSk5MVFxcnh8PR43FhhmEoFAqpvb1dSUlJUewUuHaituoBgMHxu9/9TlLXo8Hi4uL0yCOPaPXq1XrkkUcUFxdnPkKsuw7ARZzGBCzi1KlTkroeuXfixAlt2rRJmzZtkiQlJCQoOztbTU1NZh2AixjZARbRPbXg2WefVXt7u9asWaOnn35aa9as0blz57R06dKwOgAXMbIDLOKGG26QJNXU1Oi73/2uysrKzGOhUEi1tbVhdQAuIuwAi/jKV74iSXr77bf1wAMP6G//9m+VlJSkjo4Ovf3223r77bfD6gBcxN2YgEUEg0Hl5OQoPj5ex48f14ULF8xjCQkJuvHGGxUKhXTkyBHFx8dHsVPg2uFuTMBm4uPj9dBDD+nYsWO9rmd37NgxzZo1i6ADesHIDrCI7pHdqFGjdObMGXMSuSSNHTtWo0aN0qeffsrIDjGlr3nANTvAIurr6/Xxxx9r06ZNmjBhgtatW6djx47p5ptv1vz589XY2KiCggLV19erqKgo2u0CQwphB1hEc3OzJOnYsWN6+OGHw0Z2a9eu1QsvvBBWB+Aiwg6wiMzMTEnSY4891uNxYC0tLXrsscfC6gBcRNgBFlFQUGA+Fqy4uFi5ubnq6OhQUlKSjhw5orfeektxcXEqKCiIdqvAkEPYARZRX19vPv/yrbfe6rUmFAqpvr5eU6ZMuZatAUMeUw8Ai9i5c2dE64BYwsgOsIgvTiKfNm2avvrVr5qnMQ8fPqxt27b1qAPQhbADLOKzzz6TJCUmJuqDDz4ww03qWr08MTFRgUDArANwEWEHWET3lIJAIKATJ06EHTtx4oS6nw/B1AOgJ67ZARZx3XXXma8vffDRF99/sQ5AF8IOsIj8/Hzz9aXz7L74/ot1ALoQdoBFfPFa3Oeffx527IvvuWYH9ETYARbxySefmK+vdBrzi3UAuhB2gEWMHj06onVALOFuTMAi0tPTzdcZGRkqKipScnKy2tvbtXPnTrW0tPSoA9CFsAMs4n//93/N12fOnNGvfvUr873D4ei1DkAXTmMCFsE1O2DgCDvAIrhmBwwcpzEBixg5cqT5OjExUbfeeqt5ze7QoUMKBAI96gB0IewAizhz5oz5OhAIaP/+/VetA9CF05iARezbty+idUAsIewAi/iLv/iLiNYBsYSwAyzii9MLIlEHxBLCDrAIl8sV0ToglhB2gEX0dZ061rMDeiLsAIv4wx/+ENE6IJYMethVVVXJ4XCorKzM3GcYhpYtWyaPx6OkpCQVFRXp0KFDg90KYGndz76MVB0QSwY17Pbu3av169frtttuC9u/YsUKrV69WtXV1dq7d6/cbremTp2qtra2wWwHABCjBi3szp49q0cffVSvvPKKRowYYe43DENr167VD37wA82cOVN5eXl69dVX1d7ero0bNw5WO4DlDRs2LKJ1QCwZtLB76qmndN999+mee+4J29/U1CSv16uSkhJzn9Pp1OTJk9XQ0NDrd3V2dsrv94dtQKy5cOFCROuAWDIojwvbvHmzGhsb9d577/U45vV6JfW8Pdrlcun48eO9fl9VVZWee+65yDcKWMilKx182ToglkR8ZHfy5En94z/+o/793//9ik9yuHTiq2EYl50MW1FRIZ/PZ24nT56MaM+AFSQk9O3/pn2tA2JJxP8qGhsb1dLSookTJ5r7gsGgdu/ererqan300UeSukZ4mZmZZk1LS8tlJ8M6nU45nc5ItwpYisvl0tmzZ/tUByBcxEd2U6ZM0R/+8AcdOHDA3O688049+uijOnDggG666Sa53W7V1dWZnwkEAtq1a5cKCgoi3Q5gG7fccktE64BYEvGRXUpKivLy8sL2DR8+XCNHjjT3l5WVqbKyUrm5ucrNzVVlZaWSk5M1Z86cSLcD2EZ7e3tE64BYEpWT+4sXL1ZHR4fmz5+v1tZWTZo0Sdu3b1dKSko02gEs4dSpUxGtA2KJw7DgrVt+v19paWny+XxKTU2NdjvANTFhwoTLLtj6RePHj2dNO8SMvuYBz8YELOLmm2+OaB0QSwg7wCKOHj0a0ToglhB2gEXwIGhg4Ag7wCL6MseuP3VALCHsAIvo7OyMaB0QSwg7wCJ4NiYwcIQdYBFXetbsQOqAWELYARaRnZ0d0ToglhB2gEV89atfjWgdEEsIO8Aizpw5E9E6IJYQdoBF/PGPf4xoHRBLCDvAIlj1ABg4wg6wiL4uYMxCx0BPhB1gEcFgMKJ1QCwh7ACL4AkqwMARdoBFxMX17c+1r3VALOGvArCI66+/PqJ1QCwh7ACLcDgcEa0DYglhB1gEUw+AgSPsAItobW2NaB0QSwg7wCJCoVBE64BYQtgBFpGYmBjROiCWEHaARWRkZES0DoglhB1gEV6vN6J1QCwh7ACL4HFhwMARdoBFMM8OGDjCDrAIwzAiWgfEEsIOsAjCDhg4wg4AYHuEHQDA9gg7AIDtEXYAANsj7AAAtkfYAQBsj7ADANgeYQcAsD3CDgBge4QdAMD2CDsAgO0RdgAA2yPsAAC2F/Gwq6qq0te//nWlpKQoIyNDpaWl+uijj8JqDMPQsmXL5PF4lJSUpKKiIh06dCjSrQAAIGkQwm7Xrl166qmn9N///d+qq6vThQsXVFJSonPnzpk1K1as0OrVq1VdXa29e/fK7XZr6tSpamtri3Q7AADIYQzy4ldnzpxRRkaGdu3apb/+67+WYRjyeDwqKyvTkiVLJEmdnZ1yuVxavny5nnjiiat+p9/vV1pamnw+n1JTUwezfWDI6M8K5Kxph1jR1zwY9Gt2Pp9PkpSeni5JampqktfrVUlJiVnjdDo1efJkNTQ09PodnZ2d8vv9YRsAAH01qGFnGIYWLFigb37zm8rLy5Mkeb1eSZLL5Qqrdblc5rFLVVVVKS0tzdyysrIGs20AgM0Matg9/fTTev/997Vp06Yexy49JWMYxmVP01RUVMjn85nbyZMnB6VfAIA9JQzWFz/zzDP69a9/rd27d2v06NHmfrfbLalrhJeZmWnub2lp6THa6+Z0OuV0OgerVQCAzUV8ZGcYhp5++mnV1NTov/7rv5SdnR12PDs7W263W3V1dea+QCCgXbt2qaCgINLtAAAQ+ZHdU089pY0bN+rNN99USkqKeR0uLS1NSUlJcjgcKisrU2VlpXJzc5Wbm6vKykolJydrzpw5kW4HAIDIh93LL78sSSoqKgrb/2//9m/6h3/4B0nS4sWL1dHRofnz56u1tVWTJk3S9u3blZKSEul2AAAY/Hl2g4F5dohFzLMDehoy8+wAAIg2wg4AYHuEHQDA9gg7AIDtEXYAANsj7AAAtkfYAQBsj7ADANgeYQcAsD3CDgBge4QdAMD2CDsAgO0RdgAA2yPsAAC2R9gBAGyPsAMA2B5hBwCwPcIOAGB7hB0AwPYIOwCA7RF2AADbI+wAALZH2AEAbI+wAwDYHmEHALA9wg4AYHuEHQDA9gg7AIDtEXYAANsj7AAAtkfYAQBsj7ADANgeYQcAsD3CDgBge4QdAMD2CDsAgO0RdgAA2yPsAAC2lxDtBoBY197erg8//DCi37lv374+1Y0bN07JyckR/dnAUETYAVH24YcfauLEiRH9zr5+X2NjoyZMmBDRnw0MRQ7DMIxo/fB169Zp5cqVam5u1q233qq1a9eqsLDwqp/z+/1KS0uTz+dTamrqNegUuLKdO3dq5cqVutKfk8Ph6HV/UlKS4uPjr/ozfvWrX/W5n9mzZ1/x+Llz5/TZZ5/puuuu6/Gzr/ZPwkMPPaTvfOc7fe4FGEx9zYOohd0vf/lLzZ07V+vWrdPdd9+tn/70p/rZz36mDz74QGPGjLniZwk7DDWXC7K+SEqQxo2KzuXzD/8cUseF/n8uiv9HBsIM+bCbNGmSJkyYoJdfftnc97WvfU2lpaWqqqq64mcJOww1XybsxrvjtO+J6yLYTd9N+OlZ7feG+vWZm266SceOHRukjoD+6WseROWaXSAQUGNjo5YuXRq2v6SkRA0NDT3qOzs71dnZab73+/2D3iPQH1/m/4z7f9+gCQ9c/fT9YPj5m/Ua/42CqPxs4FqKStj9+c9/VjAYlMvlCtvvcrnk9Xp71FdVVem55567Vu0B19QteXfoZ/9v71Xr+nMTS2NjY99+9rhxff5OwMqiejfmpad+DMPo9XRQRUWFFixYYL73+/3Kysoa9P6AayE5OXlAd0R+cTR56d8Nd1gC4aISdqNGjVJ8fHyPUVxLS0uP0Z4kOZ1OOZ3Oa9UeAMBmonILWGJioiZOnKi6urqw/XV1dSoo4PoB0BcOh8PcAFxZ1E5jLliwQHPnztWdd96pv/qrv9L69et14sQJPfnkk9FqCQBgU1ELu29961v69NNP9fzzz6u5uVl5eXl66623dOONN0arJQCATUX1CSoDxTw7xKq+nLK04J80MGB9zQNWPQAs5GpBRtABvSPsAIu5XKARdMDlseoBYEEEG9A/jOwAALZH2AEAbI+wAwDYHmEHALA9wg4AYHuEHQDA9gg7AIDtEXYAANsj7AAAtmfJJ6h0Pz3C7/dHuRMAQDR158DVnipkybBra2uTJGVlZUW5EwDAUNDW1qa0tLTLHrfkEj+hUEinT59WSkoKqzQjZvn9fmVlZenkyZMsdYWYZRiG2tra5PF4FBd3+Stzlgw7AKzrCPQHN6gAAGyPsAMA2B5hB1iU0+nUv/zLv8jpdEa7FWDI45odAMD2GNkBAGyPsAMA2B5hBwCwPcIOAGB7hB0AwPYIO8Bidu/erRkzZsjj8cjhcKi2tjbaLQFDHmEHWMy5c+d0++23q7q6OtqtAJZhyVUPgFg2bdo0TZs2LdptAJbCyA4AYHuEHQDA9gg7AIDtEXYAANsj7AAAtsfdmIDFnD17VkePHjXfNzU16cCBA0pPT9eYMWOi2BkwdLHED2AxO3fuVHFxcY/98+bN04YNG659Q4AFEHYAANvjmh0AwPYIOwCA7RF2AADbI+wAALZH2AEAbI+wAwDYHmEHALA9wg4AYHuEHQDA9gg7AIDtEXYAANv7/xuri+IiHxpuAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 500x500 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "\n",
    "\n",
    "plt.figure(figsize=(5,5),dpi=100)\n",
    "p = plt.boxplot(user1['login_diff_time'].values, notch=True)\n",
    "\n",
    "#获取异常值的关键语句\n",
    "outlier = p[\"fliers\"][0].get_ydata()\n",
    "\n",
    "print(f\"一共有{len(outlier)}个异常数据\")\n",
    "print(f\"异常值的最大值为{outlier.max()}\")\n",
    "print(f\"异常值的最小值为{outlier.min()}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "id": "d2f9cb17",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>login_day</th>\n",
       "      <th>login_diff_time</th>\n",
       "      <th>distance_day</th>\n",
       "      <th>login_time</th>\n",
       "      <th>launch_time</th>\n",
       "      <th>chinese_subscribe_num</th>\n",
       "      <th>math_subscribe_num</th>\n",
       "      <th>add_friend</th>\n",
       "      <th>add_group</th>\n",
       "      <th>camp_num</th>\n",
       "      <th>learn_num</th>\n",
       "      <th>finish_num</th>\n",
       "      <th>study_num</th>\n",
       "      <th>coupon</th>\n",
       "      <th>course_order_num</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2000001555945280</td>\n",
       "      <td>7</td>\n",
       "      <td>6.86</td>\n",
       "      <td>131</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2000001556645228</td>\n",
       "      <td>4</td>\n",
       "      <td>1.00</td>\n",
       "      <td>81</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2000001558047804</td>\n",
       "      <td>1</td>\n",
       "      <td>0.00</td>\n",
       "      <td>179</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2000001558146467</td>\n",
       "      <td>6</td>\n",
       "      <td>1.00</td>\n",
       "      <td>32</td>\n",
       "      <td>24</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2000001558146878</td>\n",
       "      <td>4</td>\n",
       "      <td>1.75</td>\n",
       "      <td>361</td>\n",
       "      <td>39</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135612</th>\n",
       "      <td>2000002947317726</td>\n",
       "      <td>1</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135613</th>\n",
       "      <td>2000002947317758</td>\n",
       "      <td>1</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135614</th>\n",
       "      <td>2000002947317827</td>\n",
       "      <td>4</td>\n",
       "      <td>1.00</td>\n",
       "      <td>-1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135615</th>\n",
       "      <td>2000002947317941</td>\n",
       "      <td>1</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>393</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135616</th>\n",
       "      <td>2000002948014779</td>\n",
       "      <td>1</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>135617 rows × 16 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                 user_id  login_day  login_diff_time  distance_day  \\\n",
       "0       2000001555945280          7             6.86           131   \n",
       "1       2000001556645228          4             1.00            81   \n",
       "2       2000001558047804          1             0.00           179   \n",
       "3       2000001558146467          6             1.00            32   \n",
       "4       2000001558146878          4             1.75           361   \n",
       "...                  ...        ...              ...           ...   \n",
       "135612  2000002947317726          1             0.00             0   \n",
       "135613  2000002947317758          1             0.00             0   \n",
       "135614  2000002947317827          4             1.00            -1   \n",
       "135615  2000002947317941          1             0.00             0   \n",
       "135616  2000002948014779          1             0.00             0   \n",
       "\n",
       "        login_time  launch_time  chinese_subscribe_num  math_subscribe_num  \\\n",
       "0                1            1                      1                   0   \n",
       "1                3            1                      1                   1   \n",
       "2                3            0                      1                   0   \n",
       "3               24            3                      0                   0   \n",
       "4               39            0                      0                   1   \n",
       "...            ...          ...                    ...                 ...   \n",
       "135612           2            0                      0                   0   \n",
       "135613           2            0                      0                   0   \n",
       "135614           0            0                      0                   0   \n",
       "135615         393            0                      0                   0   \n",
       "135616           4            0                      0                   0   \n",
       "\n",
       "        add_friend  add_group  camp_num  learn_num  finish_num  study_num  \\\n",
       "0                1          1         0          0           0          0   \n",
       "1                1          1         2          1           0          0   \n",
       "2                1          1         2          0           0          0   \n",
       "3                1          1         1          5           5          0   \n",
       "4                1          1         2          0           0          1   \n",
       "...            ...        ...       ...        ...         ...        ...   \n",
       "135612           1          1         1          0           0          0   \n",
       "135613           1          1         1          0           0          0   \n",
       "135614           1          1         1          0           0          0   \n",
       "135615           1          1         1          0           0          0   \n",
       "135616           1          1         2          0           0          0   \n",
       "\n",
       "        coupon  course_order_num  \n",
       "0            0                 4  \n",
       "1            0                 0  \n",
       "2            0                 0  \n",
       "3            0                 1  \n",
       "4            0                 0  \n",
       "...        ...               ...  \n",
       "135612       0                 0  \n",
       "135613       0                 0  \n",
       "135614       0                 0  \n",
       "135615       0                 0  \n",
       "135616       0                 0  \n",
       "\n",
       "[135617 rows x 16 columns]"
      ]
     },
     "execution_count": 134,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "# 计算中位数\n",
    "median_age = np.median(user1['login_diff_time'])\n",
    "\n",
    "# 将user1['login_diff_time']中为负数的异常值替换成中位数\n",
    "user1.loc[user1['login_diff_time'] < 0, 'login_diff_time'] = median_age\n",
    "\n",
    "\n",
    "user1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "id": "ea2fe5c9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "一共有260个异常数据\n",
      "异常值的最大值为6588\n",
      "异常值的最小值为-1275\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdAAAAGsCAYAAABtvfa6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAz50lEQVR4nO3df3BV9Z3/8VcSkmvA5BSIyU0KSmoySpvIKnYI0UiyCkKNmI2praEZ/3BFVsFSQdswOyN2NGEVdN1lEHD3W9vxR3YnhqxNbQqdBRrM5Vck0wSKRRuUHwlBNrkJmB/k5vP9Q3LwkojJkeQG7vMxc6bmnPc953060778nB+fE2KMMQIAAEMSGugGAAC4HBGgAAA4QIACAOAAAQoAgAMEKAAADhCgAAA4QIACAODAmEA3MFr09vbq+PHjioqKUkhISKDbAQAEgDFG7e3tSkhIUGjoxceYBOg5x48f1+TJkwPdBgBgFDhy5IgmTZp00RoC9JyoqChJX/yXFh0dHeBuAACB0NbWpsmTJ9uZcDEE6Dl9l22jo6MJUAAIcoO5lcdDRAAAOECAAgDgAAEKAIADBCgAAA4QoAAAOECAAgDgAAEKAIADBCgAAA4QoAAAOECAAgDgAFP5AZAk+Xw+VVVVqbGxUfHx8crIyFBYWFig2wJGLUagAFRWVqakpCRlZWUpPz9fWVlZSkpKUllZWaBbA0YtAhQIcmVlZcrLy1Nqaqo8Ho/a29vl8XiUmpqqvLw8QhT4CiHGGBPoJkaDtrY2WZYlr9fL11gQNHw+n5KSkpSamqry8nK/Dwj39vYqJydH9fX1OnToEJdzERSGkgWMQIEgVlVVpcOHD2vFihUyxmjbtm16++23tW3bNhljVFhYqIaGBlVVVQW6VWDU4SEiIIg1NjZKkj7++GM9+OCDOnz4sL1typQpeu655/zqAJzHCBQIYvHx8ZKkgoKCAe+BFhQU+NUBOI97oOdwDxTBqLu7W+PGjdPEiRN19OhRjRlz/qJUT0+PJk2apFOnTunMmTOKiIgIYKfAyOAeKIBBqa6uVk9Pj06cOKHc3Fy/EWhubq5OnDihnp4eVVdXB7pVYNQhQIEg1ndv84033lBdXZ3S09MVHR2t9PR01dfX64033vCrA3AeDxEBQazv3ub111+vjz76qN9MRLt37/arA3Ae90DP4R4oghHvgQL+uAcKYFDCwsK0Zs0aVVRUKCcnx+8eaE5OjioqKrR69WrCExgAl3CBIJebm6vS0lItW7ZM6enp9vrExESVlpYqNzc3gN0BoxeXcM/hEi6CHV9jAYaWBYxAAUj64nJuZmZmoNsALhvcAwUAwAECFAAABwhQAAAcIEABAHCAAAUAwAECFAAABwhQAAAcIEABAHCAAAUAwAECFAAABwhQAAAcGPYAPXbsmH7yk59o4sSJGjt2rP7u7/5ONTU19nZjjFauXKmEhARFRkYqMzNT+/fv99tHV1eXlixZopiYGI0bN07z58/X0aNH/WpaWlpUUFAgy7JkWZYKCgrU2to63KcHAAhSwxqgLS0tuu222xQeHq7f//73OnDggNasWaNvfetbds0LL7ygl156SWvXrtWePXvkdrs1e/Zstbe32zVLly7Vpk2bVFJSoh07duj06dPKzs6Wz+eza/Lz81VbW6vKykpVVlaqtrZWBQUFw3l6AIBgZobRz3/+c3P77bd/5fbe3l7jdrvNqlWr7HWdnZ3Gsiyzfv16Y4wxra2tJjw83JSUlNg1x44dM6GhoaaystIYY8yBAweMJLNz5067xuPxGEnm4MGDg+rV6/UaScbr9Q7pHAEAV46hZMGwjkDfffdd3XrrrfrhD3+o2NhY3XzzzXrttdfs7Q0NDWpqatKcOXPsdS6XS7NmzVJ1dbUkqaamRmfPnvWrSUhIUEpKil3j8XhkWZZmzJhh16SlpcmyLLvmQl1dXWpra/NbAAAYrGEN0L/97W969dVXlZycrD/84Q9atGiRnnjiCf3mN7+RJDU1NUmS4uLi/H4XFxdnb2tqalJERITGjx9/0ZrY2Nh+x4+NjbVrLlRcXGzfL7UsS5MnT/5mJwsACCrDGqC9vb265ZZbVFRUpJtvvlmPPvqoHnnkEb366qt+dSEhIX5/G2P6rbvQhTUD1V9sP4WFhfJ6vfZy5MiRwZ4WAADDG6Dx8fH67ne/67du6tSp+vTTTyVJbrdbkvqNEpubm+1RqdvtVnd3t1paWi5ac+LEiX7HP3nyZL/RbR+Xy6Xo6Gi/BQCAwRrWAL3tttv04Ycf+q3761//quuuu06SlJiYKLfbrS1bttjbu7u7tX37dqWnp0uSpk+frvDwcL+axsZG1dfX2zUzZ86U1+vV7t277Zpdu3bJ6/XaNQAAXFLD+TTT7t27zZgxY8zzzz9vDh06ZN58800zduxY88Ybb9g1q1atMpZlmbKyMlNXV2cefPBBEx8fb9ra2uyaRYsWmUmTJpk//vGP5oMPPjB///d/b6ZNm2Z6enrsmrlz55qbbrrJeDwe4/F4TGpqqsnOzh50rzyFCwAYShYMa4AaY8xvf/tbk5KSYlwul7nxxhvNxo0b/bb39vaaZ555xrjdbuNyucwdd9xh6urq/Go6OjrM4sWLzYQJE0xkZKTJzs42n376qV/NqVOnzIIFC0xUVJSJiooyCxYsMC0tLYPukwAFAAwlC0KMMSawY+DRoa2tTZZlyev1cj8UAILUULKAuXABAHCAAAUAwAECFAAABwhQAAAcIEABAHCAAAUAwAECFAAABwhQAAAcIEABAHCAAAUAwAECFAAABwhQAAAcIEABAHCAAAUAwAECFAAABwhQAAAcIEABAHCAAAUAwAECFAAABwhQAAAcIEABAHCAAAUAwAECFAAABwhQAAAcIEABAHCAAAUAwAECFAAABwhQAAAcIEABAHCAAAUAwAECFAAABwhQAAAcIEABAHCAAAUAwIFhDdCVK1cqJCTEb3G73fZ2Y4xWrlyphIQERUZGKjMzU/v37/fbR1dXl5YsWaKYmBiNGzdO8+fP19GjR/1qWlpaVFBQIMuyZFmWCgoK1NraOpynBgAIcsM+Av3e976nxsZGe6mrq7O3vfDCC3rppZe0du1a7dmzR263W7Nnz1Z7e7tds3TpUm3atEklJSXasWOHTp8+rezsbPl8PrsmPz9ftbW1qqysVGVlpWpra1VQUDDcpwYACGZmGD3zzDNm2rRpA27r7e01brfbrFq1yl7X2dlpLMsy69evN8YY09raasLDw01JSYldc+zYMRMaGmoqKyuNMcYcOHDASDI7d+60azwej5FkDh48+JW9dXZ2Gq/Xay9HjhwxkozX6/0mpwwAuIx5vd5BZ8Gwj0APHTqkhIQEJSYm6sc//rH+9re/SZIaGhrU1NSkOXPm2LUul0uzZs1SdXW1JKmmpkZnz571q0lISFBKSopd4/F4ZFmWZsyYYdekpaXJsiy7ZiDFxcX2JV/LsjR58uRLet4AgCvbsAbojBkz9Jvf/EZ/+MMf9Nprr6mpqUnp6ek6deqUmpqaJElxcXF+v4mLi7O3NTU1KSIiQuPHj79oTWxsbL9jx8bG2jUDKSwslNfrtZcjR458o3MFAASXMcO583nz5tn/nJqaqpkzZ+r666/Xr3/9a6WlpUmSQkJC/H5jjOm37kIX1gxU/3X7cblccrlcgzoPAAAuNKKvsYwbN06pqak6dOiQ/TTuhaPE5uZme1TqdrvV3d2tlpaWi9acOHGi37FOnjzZb3QLAMClMqIB2tXVpb/85S+Kj49XYmKi3G63tmzZYm/v7u7W9u3blZ6eLkmaPn26wsPD/WoaGxtVX19v18ycOVNer1e7d++2a3bt2iWv12vXAABwqQ3rJdzly5fr3nvv1bXXXqvm5mY999xzamtr00MPPaSQkBAtXbpURUVFSk5OVnJysoqKijR27Fjl5+dLkizL0sMPP6xly5Zp4sSJmjBhgpYvX67U1FTdddddkqSpU6dq7ty5euSRR7RhwwZJ0sKFC5Wdna0bbrhhOE8PABDEhjVAjx49qgcffFCfffaZrrnmGqWlpWnnzp267rrrJElPP/20Ojo69Nhjj6mlpUUzZszQ5s2bFRUVZe/j5Zdf1pgxY/TAAw+oo6NDd955p15//XWFhYXZNW+++aaeeOIJ+2nd+fPna+3atcN5agCAIBdijDGBbmI0aGtrk2VZ8nq9io6ODnQ7AIAAGEoWMBcuAAAOEKAAADhAgAIA4AABCgCAAwQoAAAOEKAAADhAgAIA4AABCgCAAwQoAAAOEKAAADhAgAIA4AABCgCAAwQoAAAOEKAAADgwrN8DBXD58Pl8qqqqUmNjo+Lj45WRkeH33V0A/hiBAlBZWZmSkpKUlZWl/Px8ZWVlKSkpSWVlZYFuDRi1CFAgyJWVlSkvL0+pqanyeDxqb2+Xx+NRamqq8vLyCFHgK4QYY0ygmxgNhvIVcuBK4fP5lJSUpNTUVJWXlys09Py/U/f29ionJ0f19fU6dOgQl3MRFIaSBYxAgSBWVVWlw4cPa8WKFX7hKUmhoaEqLCxUQ0ODqqqqAtQhMHoRoEAQa2xslCSlpKQMuL1vfV8dgPMIUCCIxcfHS5Lq6+sH3N63vq8OwHkEKBDEMjIyNGXKFBUVFam3t9dvW29vr4qLi5WYmKiMjIwAdQiMXgQoEMTCwsK0Zs0aVVRUKCcnx+8p3JycHFVUVGj16tU8QAQMgIkUgCCXm5ur0tJSLVu2TOnp6fb6xMRElZaWKjc3N4DdAaMXr7Gcw2ssCHbMRAQMLQsYgQKQ9MXl3MzMzEC3AVw2uAcKAIADBCgAAA4QoAAAOECAAgDgAAEKAIADBCgAAA4QoAAAODBiAVpcXKyQkBAtXbrUXmeM0cqVK5WQkKDIyEhlZmZq//79fr/r6urSkiVLFBMTo3Hjxmn+/Pk6evSoX01LS4sKCgpkWZYsy1JBQYFaW1tH4KwAAMFqRAJ0z5492rhxo2666Sa/9S+88IJeeuklrV27Vnv27JHb7dbs2bPV3t5u1yxdulSbNm1SSUmJduzYodOnTys7O1s+n8+uyc/PV21trSorK1VZWana2loVFBSMxKkBAIKVGWbt7e0mOTnZbNmyxcyaNcv89Kc/NcYY09vba9xut1m1apVd29nZaSzLMuvXrzfGGNPa2mrCw8NNSUmJXXPs2DETGhpqKisrjTHGHDhwwEgyO3futGs8Ho+RZA4ePDjoPr1er5FkvF7vNzldAMBlbChZMOwj0Mcff1z33HOP7rrrLr/1DQ0Nampq0pw5c+x1LpdLs2bNUnV1tSSppqZGZ8+e9atJSEhQSkqKXePxeGRZlmbMmGHXpKWlybIsu2YgXV1damtr81sAABisYZ0Lt6SkRDU1Ndq7d2+/bU1NTZKkuLg4v/VxcXH65JNP7JqIiAiNHz++X03f75uamhQbG9tv/7GxsXbNQIqLi/Xss88O7YQAADhn2EagR44c0U9/+lO9+eabuuqqq76yLiQkxO9vY0y/dRe6sGag+q/bT2Fhobxer70cOXLkoscEAODLhi1Aa2pq1NzcrOnTp2vMmDEaM2aMtm/frn/7t3/TmDFj7JHnhaPE5uZme5vb7VZ3d7daWlouWnPixIl+xz958mS/0e2XuVwuRUdH+y0AAAzWsAXonXfeqbq6OtXW1trLrbfeqgULFqi2tlbf+c535Ha7tWXLFvs33d3d2r59u/1R3+nTpys8PNyvprGxUfX19XbNzJkz5fV6tXv3brtm165d8nq9fh8HBgDgUhq2e6BRUVFKSUnxWzdu3DhNnDjRXr906VIVFRUpOTlZycnJKioq0tixY5Wfny9JsixLDz/8sJYtW6aJEydqwoQJWr58uVJTU+2HkqZOnaq5c+fqkUce0YYNGyRJCxcuVHZ2tm644YbhOj0AQJAL6Ae1n376aXV0dOixxx5TS0uLZsyYoc2bNysqKsquefnllzVmzBg98MAD6ujo0J133qnXX39dYWFhds2bb76pJ554wn5ad/78+Vq7du2Inw8AIHiEGGNMoJsYDdra2mRZlrxeL/dDASBIDSULmAsXAAAHCFAAABwgQAEAcIAABQDAAQIUAAAHCFAAABwgQAEAcIAABQDAAQIUAAAHCFAAABwgQAEAcIAABQDAAQIUAAAHCFAAABwgQAEAcIAABQDAAQIUAAAHCFAAABwgQAEAcIAABQDAAQIUAAAHCFAAABwgQAEAcIAABQDAAQIUAAAHCFAAABwgQAEAcIAABQDAAQIUAAAHCFAAABwgQAEAcIAABQDAgTGBbgDA6ODz+VRVVaXGxkbFx8crIyNDYWFhgW4LGLUYgQJQWVmZkpKSlJWVpfz8fGVlZSkpKUllZWWBbg0YtYY1QF999VXddNNNio6OVnR0tGbOnKnf//739nZjjFauXKmEhARFRkYqMzNT+/fv99tHV1eXlixZopiYGI0bN07z58/X0aNH/WpaWlpUUFAgy7JkWZYKCgrU2to6nKcGXDHKysqUl5en1NRUeTwetbe3y+PxKDU1VXl5eYQo8FXMMHr33XfN7373O/Phhx+aDz/80KxYscKEh4eb+vp6Y4wxq1atMlFRUeadd94xdXV15kc/+pGJj483bW1t9j4WLVpkvv3tb5stW7aYDz74wGRlZZlp06aZnp4eu2bu3LkmJSXFVFdXm+rqapOSkmKys7OH1KvX6zWSjNfrvTQnD1wGenp6zJQpU8y9995rfD6f3zafz2fuvfdek5iY6Pe/N+BKNpQsGNYAHcj48ePNf/zHf5je3l7jdrvNqlWr7G2dnZ3Gsiyzfv16Y4wxra2tJjw83JSUlNg1x44dM6GhoaaystIYY8yBAweMJLNz5067xuPxGEnm4MGDX9lHZ2en8Xq99nLkyBECFEFn69atRpLxeDwDbq+urjaSzNatW0e2MSBAhhKgI3YP1OfzqaSkRGfOnNHMmTPV0NCgpqYmzZkzx65xuVyaNWuWqqurJUk1NTU6e/asX01CQoJSUlLsGo/HI8uyNGPGDLsmLS1NlmXZNQMpLi62L/lalqXJkydf6lMGRr3GxkZJUkpKyoDb+9b31QE4b9gDtK6uTldffbVcLpcWLVqkTZs26bvf/a6ampokSXFxcX71cXFx9rampiZFRERo/PjxF62JjY3td9zY2Fi7ZiCFhYXyer32cuTIkW90nsDlKD4+XpJUX18/4Pa+9X11AM4b9tdYbrjhBtXW1qq1tVXvvPOOHnroIW3fvt3eHhIS4ldvjOm37kIX1gxU/3X7cblccrlcgz0N4IqUkZGhKVOmqKioSOXl5QoNPf/v1L29vSouLlZiYqIyMjIC2CUwOg37CDQiIkJJSUm69dZbVVxcrGnTpumVV16R2+2WpH6jxObmZntU6na71d3drZaWlovWnDhxot9xT5482W90C8BfWFiY1qxZo4qKCuXk5Pg9hZuTk6OKigqtXr2a90GBAYz4e6DGGHV1dSkxMVFut1tbtmyxt3V3d2v79u1KT0+XJE2fPl3h4eF+NY2Njaqvr7drZs6cKa/Xq927d9s1u3btktfrtWsAfLXc3FyVlpaqrq5O6enpio6OVnp6uurr61VaWqrc3NxAtwiMSsN6CXfFihWaN2+eJk+erPb2dpWUlGjbtm2qrKxUSEiIli5dqqKiIiUnJys5OVlFRUUaO3as8vPzJUmWZenhhx/WsmXLNHHiRE2YMEHLly9Xamqq7rrrLknS1KlTNXfuXD3yyCPasGGDJGnhwoXKzs7WDTfcMJynB1wxcnNzdd999zETETAEwxqgJ06cUEFBgRobG2VZlm666SZVVlZq9uzZkqSnn35aHR0deuyxx9TS0qIZM2Zo8+bNioqKsvfx8ssva8yYMXrggQfU0dGhO++8U6+//rrf/7DffPNNPfHEE/bTuvPnz9fatWuH89SAK05YWJgyMzMD3QZw2QgxxphANzEatLW1ybIseb1eRUdHB7odAEAADCULmAsXAAAHCFAAABzgc2YAJPE5M2CoGIEC4HNmgAMEKBDk+JwZ4AxP4Z7DU7gIRj6fT0lJSUpNTR1wKr+cnBzV19fr0KFDXM5FUOApXACDUlVVpcOHD2vFihV+4SlJoaGhKiwsVENDg6qqqgLUITB6EaBAEONzZoBzBCgQxPicGeAc90DP4R4ogtGX74G+8847ev/99+3XWG677Tbdf//93ANFUBlKFvAeKBDE+j5ndv/998uyLHV0dNjbIiMj1dHRoXfeeYfwBAbAJVwAkqTOzs6L/g3AH5dwz+ESLoKRz+dTQkKCmpubdc899+gHP/iBPfJ877339Lvf/U6xsbE6fvw4o1AEBS7hAhiUbdu2qbm5Wbfffrveffddv1dZFi1apDvuuEPvv/++tm3bpjvvvDOAnQKjD5dwgSC2bds2SdKzzz474HugK1eu9KsDcB4jUACSBp5MHsBXI0CBIJaZmannnntOixcv1ueff65PPvnE3nbdddcpMjLSrgPgj0u4QBDLzMxUdHS0/vKXv6izs1MbN27U8ePHtXHjRnV2durgwYOKjo4mQIEBMAIFgtxVV12ltrY2tbW1aeHChfb6sWPH2tsB9McIFAhiVVVVam5uVnFxsWJjY/22xcbGqqioSM3NzUwmDwyAAAWCWN8k8YsXL9bHH3+srVu36q233tLWrVv10UcfafHixX51AM7jEi4QxL48mXxaWlq/e51MJg98NWYiOoeZiBCMmEwe8MdMRAAGpW8y+by8vAEnk+/s7FRpaSnhCQyAe6AANNCFqJCQkAHXA/gCl3DP4RIugtGXL+H+93//t9avX6+PP/5Y119/vRYtWqQHHniAS7gIKlzCBTAoVVVVOnz4sB599FFNnTpVhw8ftre98sorWrhwoX7729+qqqqKyRSAC3AJFwhifa+nFBYWKjU1VR6PR+3t7fJ4PEpNTdWKFSv86gCcxwgUCGJ9kyfcfvvtKi8vt7/IkpaWpvLycvtzZhdOsgCAESiAiwgJCQl0C8CoRYACQay5uVmS9P777ysnJ8fvEm5OTo7ef/99vzoA5xGgQBDrm2GoqKhIdXV1Sk9PV3R0tNLT01VfX6/nn3/erw7AedwDBYJYRkaGpkyZourqav31r38dcCaixMREPq4NDGBYR6DFxcX6/ve/r6ioKMXGxionJ0cffvihX40xRitXrlRCQoIiIyOVmZmp/fv3+9V0dXVpyZIliomJ0bhx4zR//nwdPXrUr6alpUUFBQWyLEuWZamgoECtra3DeXrAZa9vJqKKigrdf//9crlcys7Olsvl0v3336+KigqtXr2ad0CBAQxrgG7fvl2PP/64du7cqS1btqinp0dz5szRmTNn7JoXXnhBL730ktauXas9e/bI7XZr9uzZam9vt2uWLl2qTZs2qaSkRDt27NDp06eVnZ0tn89n1+Tn56u2tlaVlZWqrKxUbW2tCgoKhvP0gCtCbm6uSktLB7yEW1paqtzc3EC3CIxOZgQ1NzcbSWb79u3GGGN6e3uN2+02q1atsms6OzuNZVlm/fr1xhhjWltbTXh4uCkpKbFrjh07ZkJDQ01lZaUxxpgDBw4YSWbnzp12jcfjMZLMwYMHB9Wb1+s1kozX6/3G5wlcjnp6eszWrVvNW2+9ZbZu3Wp6enoC3RIw4oaSBSP6EJHX65UkTZgwQZLU0NCgpqYmzZkzx65xuVyaNWuWqqurJUk1NTU6e/asX01CQoJSUlLsGo/HI8uyNGPGDLsmLS1NlmXZNRfq6upSW1ub3wIEs7CwMGVmZurBBx9UZmYml22BrzFiAWqM0ZNPPqnbb79dKSkpkqSmpiZJUlxcnF9tXFycva2pqUkREREaP378RWsGetE7NjbWrrlQcXGxfb/UsixNnjz5m50gACCojFiALl68WH/+85/19ttv99t24cvaxpivfYH7wpqB6i+2n8LCQnm9Xns5cuTIYE4DAABJIxSgS5Ys0bvvvqutW7dq0qRJ9nq32y1J/UaJzc3N9qjU7Xaru7tbLS0tF605ceJEv+OePHmy3+i2j8vlUnR0tN8CAMBgDWuAGmO0ePFilZWV6X//93+VmJjotz0xMVFut1tbtmyx13V3d2v79u1KT0+XJE2fPl3h4eF+NY2Njaqvr7drZs6cKa/Xq927d9s1u3btktfrtWsAALiUhnUihccff1xvvfWW/ud//kdRUVH2SNOyLEVGRiokJERLly5VUVGRkpOTlZycrKKiIo0dO1b5+fl27cMPP6xly5Zp4sSJmjBhgpYvX67U1FTdddddkqSpU6dq7ty5euSRR7RhwwZJ0sKFC5Wdna0bbrhhOE8RABCshvNxYEkDLr/61a/smt7eXvPMM88Yt9ttXC6XueOOO0xdXZ3ffjo6OszixYvNhAkTTGRkpMnOzjaffvqpX82pU6fMggULTFRUlImKijILFiwwLS0tg+6V11gAAEPJghBjjAlcfI8eQ/kKOXAl8vl8qqqqsqfyy8jI4FUWBJ2hZAGTyQNQWVmZkpKSlJWVpfz8fGVlZSkpKUllZWWBbg0YtZhMHghyZWVlysvL0z333KOnnnpKkZGR6ujo0O9//3vl5eUxnR/wFbiEew6XcBGMfD6fkpKSFBMTo5MnT+qTTz6xt1133XW65pprdOrUKR06dIjLuQgKXMIFMChVVVU6fPiw9u7dq5tuusnvg9o33XST9u7dq4aGBlVVVQW6VWDUIUCBIHbs2DFJ0rx581ReXq60tDRdffXVSktLU3l5uebNm+dXB+A8AhQIYidPnpT0xSfNjDHatm2b3n77bW3btk3GGOXk5PjVATiPh4iAIHbNNddIktatW6fnnnuu3z3Qvi8n9dUBOI8RKBDEvv3tb0uS9u3bp87OTm3cuFHHjx/Xxo0b1dnZqX379vnVATiPp3DP4SlcBKPu7m6NGzdO48aN0/jx43X48GF7W2Jiov7v//5PZ86c0ZkzZxQRERG4RoERMpQs4BIuEMSqq6vV09Mjr9erjIwMLV++3H4PtLKyUhUVFXZdZmZmYJsFRhkCFAhijY2NkqQ33nhD//zP/2wHpvTFCPSNN97QT37yE7sOwHkEKBDE4uPjJUnXX3+9Pvroo35z4fZ9IrCvDsB53AM9h3ugCEZ9MxGlpqaqvLxcoaHnnyvs7e1VTk6O6uvrmYkIQYOZiAAMSlhYmNasWaOKigrl5OT4zUSUk5OjiooKrV69mvAEBsAlXCDI5ebmqrS0VE8++aTS09Pt9VOmTGEieeAiGIEC0M6dO3X06FG/dUeOHNHOnTsD1BEw+hGgQJB7+umn9eKLLyomJkavvfaaGhsb9dprrykmJkYvvviinn766UC3CIxKPER0Dg8RIRj1TaQwceJEHT16VGPGnL+r09PTo0mTJunUqVNMpICgwUNEAAZl3bp16unp0XPPPecXnpI0ZswY/fKXv1RPT4/WrVsXoA6B0YsABYLYxx9/LEnKzs4ecHvf+r46AOcRoEAQu/766yXJbwaiL+tb31cH4DzugZ7DPVAEI+6BAv64BwpgUCIiIvSzn/1MJ06c0KRJk/w+ZzZp0iSdOHFCP/vZzwhPYABMpAAEuRdeeEGS9PLLL+vRRx+1148ZM0ZPPfWUvR2APy7hnsMlXAS77u5urVu3Th9//LGuv/56PfbYY4w8EXSGkgUE6DkEKACAe6AAAAwzAhQAAAcIUAAAHCBAAQBwgAAFAMABAhQAAAcIUAAAHCBAAQBwYFgD9E9/+pPuvfdeJSQkKCQkROXl5X7bjTFauXKlEhISFBkZqczMTO3fv9+vpqurS0uWLFFMTIzGjRun+fPn6+jRo341LS0tKigokGVZsixLBQUFam1tHc5TAwAEuWEN0DNnzmjatGlau3btgNtfeOEFvfTSS1q7dq327Nkjt9ut2bNnq7293a5ZunSpNm3apJKSEu3YsUOnT59Wdna2fD6fXZOfn6/a2lpVVlaqsrJStbW1KigoGM5TAwAEOzNCJJlNmzbZf/f29hq3221WrVplr+vs7DSWZZn169cbY4xpbW014eHhpqSkxK45duyYCQ0NNZWVlcYYYw4cOGAkmZ07d9o1Ho/HSDIHDx4cdH9er9dIMl6v1+kpAgAuc0PJgoDdA21oaFBTU5PmzJljr3O5XJo1a5aqq6slSTU1NTp79qxfTUJCglJSUuwaj8cjy7I0Y8YMuyYtLU2WZdk1A+nq6lJbW5vfAgDAYAUsQJuamiRJcXFxfuvj4uLsbU1NTYqIiND48eMvWhMbG9tv/7GxsXbNQIqLi+17ppZlafLkyd/ofIDLnc/n07Zt2/T2229r27ZtfrdJAPQX8KdwQ0JC/P42xvRbd6ELawaq/7r9FBYWyuv12suRI0eG2Dlw5SgrK1NSUpKysrKUn5+vrKwsJSUlqaysLNCtAaNWwALU7XZLUr9RYnNzsz0qdbvd6u7uVktLy0VrTpw40W//J0+e7De6/TKXy6Xo6Gi/BQhGZWVlysvLU2pqqjwej9rb2+XxeJSamqq8vDxCFPgKAQvQxMREud1ubdmyxV7X3d2t7du3Kz09XZI0ffp0hYeH+9U0Njaqvr7erpk5c6a8Xq92795t1+zatUter9euATAwn8+nZcuWKTs7W+Xl5UpLS9PVV1+ttLQ0lZeXKzs7W8uXL+dyLjCAMcO589OnT+ujjz6y/25oaFBtba0mTJiga6+9VkuXLlVRUZGSk5OVnJysoqIijR07Vvn5+ZIky7L08MMPa9myZZo4caImTJig5cuXKzU1VXfddZckaerUqZo7d64eeeQRbdiwQZK0cOFCZWdn64YbbhjO0wMue1VVVTp8+LDefvtthYb6//t0aGioCgsLlZ6erqqqKmVmZgamSWC0Gs7Hgbdu3Wok9VseeughY8wXr7I888wzxu12G5fLZe644w5TV1fnt4+Ojg6zePFiM2HCBBMZGWmys7PNp59+6ldz6tQps2DBAhMVFWWioqLMggULTEtLy5B65TUWBKO33nrLSDLt7e0Dbm9razOSzFtvvTXCnQGBMZQsCDHGmICl9yjS1tYmy7Lk9Xq5H4qgsW3bNmVlZcnj8ej73/++qqqq1NjYqPj4eGVkZGj37t1KT0/X1q1bGYEiKAwlCwjQcwhQBCOfz6ekpCTFxMTo5MmT+uSTT+xt1113na655hqdOnVKhw4dUlhYWAA7BUbGULIg4K+xAAicsLAw/fCHP9TevXvV2dmpjRs36vjx49q4caM6Ozu1d+9e5eXlEZ7AABiBnsMIFMHoyyPQzz77TIcPH7a3JSYmauLEiYxAEVSGkgXD+hQugNHty0/hTps2TU899ZQOHTqk5ORkvfjii6qtreUpXOArEKBAEGtsbJQklZSUKCMjQz09PZKkzZs3a8OGDXr88cf96gCcxz1QIIjFx8dLkl555RX19vb6bevt7dUrr7ziVwfgPAIUCGJf/orR3Llz/abymzt37oB1AL5AgAJBbN26dfY/h4aGyhhjL1+emejLdQC+QIACQWzHjh2Svvg6Ud8c09HR0UpPT9f+/fv1i1/8wq8OwHkEKBDErr76aklffKj+wIEDevzxxzVnzhw9/vjj2r9/v33vs68OwHm8B3oO74EiGG3evFl33323rrrqKnV1denL/3cQEhIil8ulzs5O/eEPf9CcOXMC2CkwMpjKzwECFMHI5/Np7Nix6u7u/sqaiIgIff7550ykgKDAVH4ABsXn8100PKUvvtPL90CB/ghQIIitWbPmktYBwYQABYLY2rVrL2kdEEwIUCCIHT9+/JLWAcGEAAUAwAECFIBtw4YNOnbsmDZs2BDoVoBRj6+xALA9+uijgW4BuGwQoMAV5vPPP9fBgwcv+X4/+OCDr6258cYbNXbs2Et+bGA0IkCBK8zBgwc1ffr0S77fweyzpqZGt9xyyyU/NjAaEaDAFebGG29UTU3NoGqHErSD2eeNN9446P0BlzsCFLjCjB07dtCjQGOMQkJCBlUHwB9P4QJB7uvCkfAEBkaAApAxRi6Xy2+dy+UiPIGLIEABSJI6Ozvt+5w1NTXq7OwMcEfA6EaAAgDgAAEKAIADPIULjEKdnZ1atGiRvF7viB63tbVVkrRs2TJ961vfGpFjhoeH61/+5V+UmJg4IscDLhUCFBiFGhoa9Otf/1ppaWmaMGHCiB337Nmz9n9+3Ye2LwWfz6fy8nLdd999BCguOwQoMIqtXr1at91224gdr28awJGakq+jo4Op/3DZIkAB2IYyCQMQ7AhQYBTqe//y7rvvHvKlzavCevXrNf+s706dOhytDejkZ5/p5z//ueoaO9XpG/yziX2XjD///PPhag0YNldUgK5bt04vvviiGhsb9b3vfU//+q//qoyMjEC3BQxZe3u7JOnMmTOqr68f0m9vdofqu1WLpKrh6Gxg10j6fzOkWzacVn1T75B/f+DAgUvfFDDMrpgA/a//+i8tXbpU69at02233aYNGzZo3rx5OnDggK699tpAtwcMSd8I9NFHH9Xtt98+pN+G9Xbrve4mR8ftPntWn332mWJiYhQRHj7k3z/1L275QiMGXd/V1aV//Md/1K233jrkYwGBFmKukLm6ZsyYoVtuuUWvvvqqvW7q1KnKyclRcXHx1/6+ra1NlmXJ6/UqOjp6OFsFvlZZWZnuv//+QLcxIsLCwvTHP/5RmZmZgW4FGFIWXBEj0O7ubtXU1OgXv/iF3/o5c+aourp6wN90dXWpq6vL/rutrW1YewSG4r777pPX61V4eLhCQ0duvpN9+/Zp5syZ8ng8uvnmm0fkmKGhoQp3MNoFAu2KCNDPPvtMPp9PcXFxfuvj4uLU1DTwpazi4mI9++yzI9EeMGRhYWGOr4T0vYrixMcff2z/Z0TE4C/F9hmp11+A0eCKCNA+F37X8GLfOiwsLNSTTz5p/93W1qbJkycPa3/ASDh48OCQPpQ9kJ/85CeOfldTU8NrMAgaV0SAxsTEKCwsrN9os7m5ud+otI/L5er3+SbgSnDjjTfaX1UZqo6ODh0+fFhTpkxRZGSko2MDweKKCNCIiAhNnz5dW7Zs0T/8wz/Y67ds2aL77rsvgJ0BI++bToYwkjMfAZezKyJAJenJJ59UQUGBbr31Vs2cOVMbN27Up59+qkWLFgW6NQDAFeiKCdAf/ehHOnXqlH75y1+qsbFRKSkpeu+993TdddcFujUAwBXoinkP9JviPVAAwFCygA9qAwDgAAEKAIADBCgAAA4QoAAAOECAAgDgAAEKAIADBCgAAA4QoAAAOECAAgDgAAEKAIADBCgAAA4QoAAAOECAAgDgAAEKAIADBCgAAA4QoAAAOECAAgDgAAEKAIADBCgAAA4QoAAAOECAAgDgAAEKAIADBCgAAA4QoAAAOECAAgDgAAEKAIADBCgAAA4QoAAAOECAAgDgAAEKAIADBCgAAA4QoAAAODAm0A0AGB18Pp+qqqrU2Nio+Ph4ZWRkKCwsLNBtAaMWI1AAKisrU1JSkrKyspSfn6+srCwlJSWprKws0K0Bo9awBujzzz+v9PR0jR07Vt/61rcGrPn000917733aty4cYqJidETTzyh7u5uv5q6ujrNmjVLkZGR+va3v61f/vKXMsb41Wzfvl3Tp0/XVVddpe985ztav379cJ0WcEUpKytTXl6eUlNT5fF41N7eLo/Ho9TUVOXl5RGiwFcY1gDt7u7WD3/4Q/3TP/3TgNt9Pp/uuecenTlzRjt27FBJSYneeecdLVu2zK5pa2vT7NmzlZCQoD179ujf//3ftXr1ar300kt2TUNDg37wgx8oIyND+/bt04oVK/TEE0/onXfeGc7TAy57Pp9Py5YtU3Z2tsrLy5WWlqarr75aaWlpKi8vV3Z2tpYvXy6fzxfoVoHRx4yAX/3qV8ayrH7r33vvPRMaGmqOHTtmr3v77beNy+UyXq/XGGPMunXrjGVZprOz064pLi42CQkJpre31xhjzNNPP21uvPFGv30/+uijJi0tbdA9er1eI8k+LhAMtm7daiQZj8cz4Pbq6mojyWzdunVkGwMCZChZENB7oB6PRykpKUpISLDX3X333erq6lJNTY1dM2vWLLlcLr+a48eP6/Dhw3bNnDlz/PZ99913a+/evTp79uyAx+7q6lJbW5vfAgSbxsZGSVJKSsqA2/vW99UBOC+gAdrU1KS4uDi/dePHj1dERISampq+sqbv76+r6enp0WeffTbgsYuLi2VZlr1Mnjz5kpwTcDmJj4+XJNXX1w+4vW99Xx2A84YcoCtXrlRISMhFl7179w56fyEhIf3WGWP81l9YY849QDTUmi8rLCyU1+u1lyNHjgy6Z+BKkZGRoSlTpqioqEi9vb1+23p7e1VcXKzExERlZGQEqENg9Brye6CLFy/Wj3/844vWTJkyZVD7crvd2rVrl9+6lpYWnT171h5Rut1ue6TZp7m5WZK+tmbMmDGaOHHigMd2uVx+l4WBYBQWFqY1a9YoLy9POTk5KiwsVEpKiurr61VcXKyKigqVlpbyPigwgCEHaExMjGJiYi7JwWfOnKnnn3/efnFbkjZv3iyXy6Xp06fbNStWrFB3d7ciIiLsmoSEBDuoZ86cqd/+9rd++968ebNuvfVWhYeHX5JegStVbm6uSktLtWzZMqWnp9vrExMTVVpaqtzc3AB2B4xiw/k00yeffGL27dtnnn32WXP11Vebffv2mX379pn29nZjjDE9PT0mJSXF3HnnneaDDz4wf/zjH82kSZPM4sWL7X20traauLg48+CDD5q6ujpTVlZmoqOjzerVq+2av/3tb2bs2LHmZz/7mTlw4ID5z//8TxMeHm5KS0sH3StP4SLY9fT0mK1bt5q33nrLbN261fT09AS6JWDEDSULhjVAH3roISOp3/LlR+I/+eQTc88995jIyEgzYcIEs3jxYr9XVowx5s9//rPJyMgwLpfLuN1us3LlSvsVlj7btm0zN998s4mIiDBTpkwxr7766pB6JUABAEPJghBjLpjSJ0i1tbXJsix5vV5FR0cHuh0AQAAMJQuYTB6AJCaTB4aKyeQBMJk84AABCgQ5JpMHnOEe6DncA0Uw8vl8SkpKUmpqqsrLyxUaev7fqXt7e5WTk6P6+nodOnSIy7kICkPJAkagQBCrqqrS4cOHtWLFCr/wlKTQ0FAVFhaqoaFBVVVVAeoQGL0IUCCIMZk84BwBCgQxJpMHnCNAgSDGZPKAcwQoEMT6JpOvqKhQTk6O31O4OTk5qqio0OrVq3mACBgAEykAQY7J5AFneI3lHF5jQbBjJiKAqfwAOBAWFqbMzMxAtwFcNrgHCgCAAwQoAAAOEKAAADhAgAIA4AABCgCAAwQoAAAOEKAAADhAgAIA4AABCgCAA8xEdE7fjIZtbW0B7gQAECh9GTCYWW4J0HPa29slSZMnTw5wJwCAQGtvb5dlWRetYTL5c3p7e3X8+HFFRUUpJCQk0O0AAdHW1qbJkyfryJEjfFQBQckYo/b2diUkJCg09OJ3OQlQADa+SgQMHg8RAQDgAAEKAIADBCgAm8vl0jPPPCOXyxXoVoBRj3ugAAA4wAgUAAAHCFAAABwgQAEAcIAABQDAAQIUAAAHCFAA+tOf/qR7771XCQkJCgkJUXl5eaBbAkY9AhSAzpw5o2nTpmnt2rWBbgW4bPA1FgCaN2+e5s2bF+g2gMsKI1AAABwgQAEAcIAABQDAAQIUAAAHCFAAABzgKVwAOn36tD766CP774aGBtXW1mrChAm69tprA9gZMHrxOTMA2rZtm7Kysvqtf+ihh/T666+PfEPAZYAABQDAAe6BAgDgAAEKAIADBCgAAA4QoAAAOECAAgDgAAEKAIADBCgAAA4QoAAAOECAAgDgAAEKAIADBCgAAA78f1LfxloTg7SUAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 500x500 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "\n",
    "\n",
    "plt.figure(figsize=(5,5),dpi=100)\n",
    "p = plt.boxplot(user1['distance_day'].values, notch=True)\n",
    "\n",
    "#获取异常值的关键语句\n",
    "outlier = p[\"fliers\"][0].get_ydata()\n",
    "\n",
    "print(f\"一共有{len(outlier)}个异常数据\")\n",
    "print(f\"异常值的最大值为{outlier.max()}\")\n",
    "print(f\"异常值的最小值为{outlier.min()}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "id": "575dad00",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>login_day</th>\n",
       "      <th>login_diff_time</th>\n",
       "      <th>distance_day</th>\n",
       "      <th>login_time</th>\n",
       "      <th>launch_time</th>\n",
       "      <th>chinese_subscribe_num</th>\n",
       "      <th>math_subscribe_num</th>\n",
       "      <th>add_friend</th>\n",
       "      <th>add_group</th>\n",
       "      <th>camp_num</th>\n",
       "      <th>learn_num</th>\n",
       "      <th>finish_num</th>\n",
       "      <th>study_num</th>\n",
       "      <th>coupon</th>\n",
       "      <th>course_order_num</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2000001555945280</td>\n",
       "      <td>7</td>\n",
       "      <td>6.86</td>\n",
       "      <td>131</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2000001556645228</td>\n",
       "      <td>4</td>\n",
       "      <td>1.00</td>\n",
       "      <td>81</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2000001558047804</td>\n",
       "      <td>1</td>\n",
       "      <td>0.00</td>\n",
       "      <td>179</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2000001558146467</td>\n",
       "      <td>6</td>\n",
       "      <td>1.00</td>\n",
       "      <td>32</td>\n",
       "      <td>24</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2000001558146878</td>\n",
       "      <td>4</td>\n",
       "      <td>1.75</td>\n",
       "      <td>361</td>\n",
       "      <td>39</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135612</th>\n",
       "      <td>2000002947317726</td>\n",
       "      <td>1</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135613</th>\n",
       "      <td>2000002947317758</td>\n",
       "      <td>1</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135614</th>\n",
       "      <td>2000002947317827</td>\n",
       "      <td>4</td>\n",
       "      <td>1.00</td>\n",
       "      <td>84</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135615</th>\n",
       "      <td>2000002947317941</td>\n",
       "      <td>1</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>393</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135616</th>\n",
       "      <td>2000002948014779</td>\n",
       "      <td>1</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>135617 rows × 16 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                 user_id  login_day  login_diff_time  distance_day  \\\n",
       "0       2000001555945280          7             6.86           131   \n",
       "1       2000001556645228          4             1.00            81   \n",
       "2       2000001558047804          1             0.00           179   \n",
       "3       2000001558146467          6             1.00            32   \n",
       "4       2000001558146878          4             1.75           361   \n",
       "...                  ...        ...              ...           ...   \n",
       "135612  2000002947317726          1             0.00             0   \n",
       "135613  2000002947317758          1             0.00             0   \n",
       "135614  2000002947317827          4             1.00            84   \n",
       "135615  2000002947317941          1             0.00             0   \n",
       "135616  2000002948014779          1             0.00             0   \n",
       "\n",
       "        login_time  launch_time  chinese_subscribe_num  math_subscribe_num  \\\n",
       "0                1            1                      1                   0   \n",
       "1                3            1                      1                   1   \n",
       "2                3            0                      1                   0   \n",
       "3               24            3                      0                   0   \n",
       "4               39            0                      0                   1   \n",
       "...            ...          ...                    ...                 ...   \n",
       "135612           2            0                      0                   0   \n",
       "135613           2            0                      0                   0   \n",
       "135614           0            0                      0                   0   \n",
       "135615         393            0                      0                   0   \n",
       "135616           4            0                      0                   0   \n",
       "\n",
       "        add_friend  add_group  camp_num  learn_num  finish_num  study_num  \\\n",
       "0                1          1         0          0           0          0   \n",
       "1                1          1         2          1           0          0   \n",
       "2                1          1         2          0           0          0   \n",
       "3                1          1         1          5           5          0   \n",
       "4                1          1         2          0           0          1   \n",
       "...            ...        ...       ...        ...         ...        ...   \n",
       "135612           1          1         1          0           0          0   \n",
       "135613           1          1         1          0           0          0   \n",
       "135614           1          1         1          0           0          0   \n",
       "135615           1          1         1          0           0          0   \n",
       "135616           1          1         2          0           0          0   \n",
       "\n",
       "        coupon  course_order_num  \n",
       "0            0                 4  \n",
       "1            0                 0  \n",
       "2            0                 0  \n",
       "3            0                 1  \n",
       "4            0                 0  \n",
       "...        ...               ...  \n",
       "135612       0                 0  \n",
       "135613       0                 0  \n",
       "135614       0                 0  \n",
       "135615       0                 0  \n",
       "135616       0                 0  \n",
       "\n",
       "[135617 rows x 16 columns]"
      ]
     },
     "execution_count": 136,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "# 计算中位数\n",
    "median_age = np.median(user1['distance_day'])\n",
    "\n",
    "# 将user1['ldistance_day']中为负数的异常值替换成中位数\n",
    "user1.loc[user1['distance_day'] < 0, 'distance_day'] = median_age\n",
    "\n",
    "\n",
    "user1"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9250c1af",
   "metadata": {},
   "source": [
    "# 数据合并"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "id": "d66bcee0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>login_day</th>\n",
       "      <th>login_diff_time</th>\n",
       "      <th>distance_day</th>\n",
       "      <th>login_time</th>\n",
       "      <th>launch_time</th>\n",
       "      <th>chinese_subscribe_num</th>\n",
       "      <th>math_subscribe_num</th>\n",
       "      <th>add_friend</th>\n",
       "      <th>add_group</th>\n",
       "      <th>...</th>\n",
       "      <th>video_read</th>\n",
       "      <th>next_nize</th>\n",
       "      <th>answer_task</th>\n",
       "      <th>chapter_module</th>\n",
       "      <th>course_tab</th>\n",
       "      <th>slide_subscribe</th>\n",
       "      <th>baby_info</th>\n",
       "      <th>click_notunlocked</th>\n",
       "      <th>share</th>\n",
       "      <th>click_dialog</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2000001563151338</td>\n",
       "      <td>5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>32</td>\n",
       "      <td>16</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2000001563163750</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>84</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2000001563266119</td>\n",
       "      <td>5</td>\n",
       "      <td>0.8</td>\n",
       "      <td>22</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>34</td>\n",
       "      <td>42</td>\n",
       "      <td>15</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2000001566046975</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>84</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2000001566153564</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>84</td>\n",
       "      <td>228</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>12</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>14</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4634</th>\n",
       "      <td>2000002940317890</td>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>11</td>\n",
       "      <td>271</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>32</td>\n",
       "      <td>35</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4635</th>\n",
       "      <td>2000002941209916</td>\n",
       "      <td>2</td>\n",
       "      <td>0.5</td>\n",
       "      <td>28</td>\n",
       "      <td>134</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4636</th>\n",
       "      <td>2000002941212854</td>\n",
       "      <td>2</td>\n",
       "      <td>0.5</td>\n",
       "      <td>23</td>\n",
       "      <td>161</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>18</td>\n",
       "      <td>21</td>\n",
       "      <td>7</td>\n",
       "      <td>4</td>\n",
       "      <td>13</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4637</th>\n",
       "      <td>2000002943617210</td>\n",
       "      <td>2</td>\n",
       "      <td>0.5</td>\n",
       "      <td>24</td>\n",
       "      <td>47</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>22</td>\n",
       "      <td>20</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4638</th>\n",
       "      <td>2000002943810189</td>\n",
       "      <td>2</td>\n",
       "      <td>0.5</td>\n",
       "      <td>1</td>\n",
       "      <td>23</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>25</td>\n",
       "      <td>14</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4639 rows × 49 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "               user_id  login_day  login_diff_time  distance_day  login_time  \\\n",
       "0     2000001563151338          5              1.4            32          16   \n",
       "1     2000001563163750          4              1.0            84           0   \n",
       "2     2000001563266119          5              0.8            22          10   \n",
       "3     2000001566046975          4              1.0            84           0   \n",
       "4     2000001566153564          1              0.0            84         228   \n",
       "...                ...        ...              ...           ...         ...   \n",
       "4634  2000002940317890          2              1.0            11         271   \n",
       "4635  2000002941209916          2              0.5            28         134   \n",
       "4636  2000002941212854          2              0.5            23         161   \n",
       "4637  2000002943617210          2              0.5            24          47   \n",
       "4638  2000002943810189          2              0.5             1          23   \n",
       "\n",
       "      launch_time  chinese_subscribe_num  math_subscribe_num  add_friend  \\\n",
       "0               1                      1                   1           1   \n",
       "1               0                      1                   0           1   \n",
       "2               0                      0                   0           1   \n",
       "3               0                      0                   0           1   \n",
       "4               0                      1                   0           1   \n",
       "...           ...                    ...                 ...         ...   \n",
       "4634            0                      1                   0           1   \n",
       "4635            1                      1                   0           1   \n",
       "4636            1                      1                   0           1   \n",
       "4637            0                      0                   0           1   \n",
       "4638            0                      0                   0           1   \n",
       "\n",
       "      add_group  ...  video_read  next_nize  answer_task  chapter_module  \\\n",
       "0             1  ...           2          0            0               7   \n",
       "1             1  ...           0          0            0               0   \n",
       "2             1  ...          34         42           15               0   \n",
       "3             1  ...           0          0            0               0   \n",
       "4             1  ...          12          8            0              21   \n",
       "...         ...  ...         ...        ...          ...             ...   \n",
       "4634          1  ...          32         35            1               5   \n",
       "4635          1  ...           2          0            0              23   \n",
       "4636          1  ...          18         21            7               4   \n",
       "4637          1  ...          22         20            0               2   \n",
       "4638          1  ...          25         14            0               2   \n",
       "\n",
       "      course_tab  slide_subscribe  baby_info click_notunlocked  share  \\\n",
       "0              1                2          5                 0      0   \n",
       "1              0                0          0                 0      0   \n",
       "2              1                5          5                 0      1   \n",
       "3              0                0          0                 0      0   \n",
       "4              3                1          3                14      0   \n",
       "...          ...              ...        ...               ...    ...   \n",
       "4634           0                2          0                 0      1   \n",
       "4635           2                2          0                 0      1   \n",
       "4636          13                1          0                 3      6   \n",
       "4637           0                0          5                 0      4   \n",
       "4638           0                0          0                 0      5   \n",
       "\n",
       "      click_dialog  \n",
       "0                1  \n",
       "1                0  \n",
       "2                4  \n",
       "3                0  \n",
       "4                0  \n",
       "...            ...  \n",
       "4634             1  \n",
       "4635             1  \n",
       "4636             0  \n",
       "4637             1  \n",
       "4638             0  \n",
       "\n",
       "[4639 rows x 49 columns]"
      ]
     },
     "execution_count": 137,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#将处理后的数据合并，得到下单用户的所有信息，可以根据这个表来进行分析下单用户的特征\n",
    "user12 = pd.merge(user1, user2, on='user_id')\n",
    "user123 = pd.merge(user12, user3, on='user_id')\n",
    "user = pd.merge(user123, user4, on='user_id')\n",
    "\n",
    "user"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7c948bf8",
   "metadata": {},
   "source": [
    "# 数据导出"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "id": "6ff2e680",
   "metadata": {},
   "outputs": [],
   "source": [
    "user1.to_csv(r\"C:\\Users\\阿璃\\Desktop\\数据分析大作业项目题目（三选一）\\B题\\处理好的数据\\login_day1.csv\")\n",
    "user2.to_csv(r\"C:\\Users\\阿璃\\Desktop\\数据分析大作业项目题目（三选一）\\B题\\处理好的数据\\result1.csv\")\n",
    "user3.to_csv(r\"C:\\Users\\阿璃\\Desktop\\数据分析大作业项目题目（三选一）\\B题\\处理好的数据\\user_info1.csv\")\n",
    "user4.to_csv(r\"C:\\Users\\阿璃\\Desktop\\数据分析大作业项目题目（三选一）\\B题\\处理好的数据\\visit_info1.csv\")\n",
    "user.to_csv(r\"C:\\Users\\阿璃\\Desktop\\数据分析大作业项目题目（三选一）\\B题\\处理好的数据\\user.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e5f26d44",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "14b35253",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "python3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
